Crontab xml importer
Import files with metadata values from xml files. This is a crontab task base in java class ( .jar ).
The first action when crontab task run is import all file into /home/openkm/import/Output and delete it after has being imported. Then parse xml files located at /home/openkm/import/logfile and inserts corresponding metadata to documents. Finally xml parsed files are deleted also
Description:
- Files to be imported are stored at /home/openkm/import/Output OpenKM server. (Variable SYSTEM_FOLDER+Output).
- XML files with metadata values are stored at /home/openkm/import/logfile OpenKM server. (Variable SYSTEM_FOLDER+logfile).
- Files are imported at /okm:root/import. (Variable OPENKM_FOLDER).
Metadata ( property groups )
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE property-groups PUBLIC "-//OpenKM//DTD Property Groups 2.1//EN"
"http://www.openkm.com/dtd/property-groups-2.1.dtd">
<property-groups>
<property-group label="Data" name="okg:data">
<input label="Folio" name="okp:data.folio" type="text"/>
<input label="Rut" name="okp:data.rut" type="text"/>
<input label="Placa patente" name="okp:data.placa" type="text"/>
<input label="Nombre" name="okp:data.nombres" type="text"/>
<input label="Apellido parterno" name="okp:data.apellido_padre" type="text"/>
<input label="Apellido materno" name="okp:data.apellido_madre" type="text"/>
<select label="Año contable" name="okp:data.year" type="simple">
<option value="2012" label="2012" />
<option value="2013" label="2013" />
</select>
<input label="Tipo documento" name="okp:data.documento" type="text"/>
</property-group>
</property-groups>
XML structure
<?xml version="1.0" encoding="UTF-16"?>
<INDEX_LOG>
<Batch BatchID="2013-04-16">
<File>
<Filename>10201200365212 - 01.pdf</Filename>
<Fields>
<Field>
<Name>Folio</Name>
<Value>10201200365245</Value>
</Field>
<Field>
<Name>Rut</Name>
<Value>9419475-JK</Value>
</Field>
<Field>
<Name>Placa Patente</Name>
<Value>XG412190</Value>
</Field>
<Field>
<Name>Nombres</Name>
<Value>JOSEP</Value>
</Field>
<Field>
<Name>Apellido Paterno</Name>
<Value>LLORT</Value>
</Field>
<Field>
<Name>Apellido Materno</Name>
<Value>TELLA</Value>
</Field>
<Field>
<Name>Año</Name>
<Value>2012</Value>
</Field>
<Field>
<Name>Tipo de Documento</Name>
<Value>Comprobante de Permiso</Value>
</Field>
</Fields>
</File>
etc....
</Batch>
</INDEX_LOG>
Java class
package com.openkm.cron;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.openkm.api.OKMDocument;
import com.openkm.api.OKMPropertyGroup;
import com.openkm.api.OKMSearch;
import com.openkm.automation.AutomationException;
import com.openkm.bean.Document;
import com.openkm.bean.PropertyGroup;
import com.openkm.bean.QueryResult;
import com.openkm.core.AccessDeniedException;
import com.openkm.core.DatabaseException;
import com.openkm.core.FileSizeExceededException;
import com.openkm.core.ItemExistsException;
import com.openkm.core.LockException;
import com.openkm.core.NoSuchGroupException;
import com.openkm.core.NoSuchPropertyException;
import com.openkm.core.ParseException;
import com.openkm.core.PathNotFoundException;
import com.openkm.core.RepositoryException;
import com.openkm.core.UnsupportedMimeTypeException;
import com.openkm.core.UserQuotaExceededException;
import com.openkm.core.VirusDetectedException;
import com.openkm.dao.bean.QueryParams;
import com.openkm.extension.core.ExtensionException;
import com.openkm.module.db.stuff.DbSessionManager;
/**
* XMLImporter
*
*/
public class XMLImporter {
private static final String SYSTEM_FOLDER = "/home/openkm/import";
private static final String OPENKM_FOLDER = "/okm:root/import";
private static final String ATTRIBUTE_FILENAME = "Filename";
private static final String ATTRIBUTE_FIELDS = "Fields";
private static final String ATTRIBUTE_FIELD = "Field";
private static final String ATTRIBUTE_NAME = "Name";
private static final String ATTRIBUTE_VALUE = "Value";
private static final String FIELD_NAME_FOLIO = "folio";
private static final String FIELD_NAME_RUT = "rut";
private static final String FIELD_NAME_PLACA_PATENTE = "placa patente";
private static final String FIELD_NAME_NOMBRES = "nombres";
private static final String FIELD_NAME_APELLIDO_PATERNO = "apellido paterno";
private static final String FIELD_NAME_APELLIDO_MATERNO = "apellido materno";
private static final String FIELD_NAME_ANO = "año";
private static final String FIELD_NAME_TIPO_DOCUMENTO = "tipo de documento";
public static void main(String[] args) {
System.out.println(cronTask(args));
}
public static String cronTask(String[] systemToken) {
try {
importFiles();
importMetadata();
} catch (UnsupportedMimeTypeException e) {
e.printStackTrace();
} catch (FileSizeExceededException e) {
e.printStackTrace();
} catch (UserQuotaExceededException e) {
e.printStackTrace();
} catch (VirusDetectedException e) {
e.printStackTrace();
} catch (ItemExistsException e) {
e.printStackTrace();
} catch (PathNotFoundException e) {
e.printStackTrace();
} catch (AccessDeniedException e) {
e.printStackTrace();
} catch (RepositoryException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (DatabaseException e) {
e.printStackTrace();
} catch (ExtensionException e) {
e.printStackTrace();
} catch (AutomationException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} catch (NoSuchGroupException e) {
e.printStackTrace();
} catch (LockException e) {
e.printStackTrace();
} catch (NoSuchPropertyException e) {
e.printStackTrace();
}
return "";
}
/**
* importMetadata
*/
public static void importMetadata() throws ParserConfigurationException, SAXException, IOException, ParseException,
RepositoryException, DatabaseException, PathNotFoundException, NoSuchGroupException, LockException, AccessDeniedException,
ExtensionException, NoSuchPropertyException, UnsupportedMimeTypeException, FileSizeExceededException,
UserQuotaExceededException, VirusDetectedException, ItemExistsException, AutomationException {
String fileName = "";
String folio = "";
String rut = "";
String placaPatente = "";
String nombres = "";
String apellidoPaterno = "";
String apellidoMaterno = "";
String ano = "";
String tipoDocumento = "";
File folder = new File(SYSTEM_FOLDER + "/logfile");
File[] listOfFiles = folder.listFiles();
for (int i = 0; i < listOfFiles.length; i++) {
File xmlFile = listOfFiles[i];
if (xmlFile.isFile() && xmlFile.getName().toLowerCase().endsWith("indexlog.xml")) {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
dbf.setAttribute("http://xml.org/sax/features/namespaces", Boolean.TRUE);
DocumentBuilder db = dbf.newDocumentBuilder();
org.w3c.dom.Document xmlDoc = db.parse(xmlFile);
xmlDoc.getDocumentElement().normalize();
NodeList fileNodesList = xmlDoc.getElementsByTagName("File");
for (int x = 0; x < fileNodesList.getLength(); x++) {
fileName = "";
folio = "";
rut = "";
placaPatente = "";
nombres = "";
apellidoPaterno = "";
apellidoMaterno = "";
ano = "";
tipoDocumento = "";
Node childFildNode = fileNodesList.item(x);
if (childFildNode.getNodeType() == Node.ELEMENT_NODE) {
Element fileElement = (Element) childFildNode; // attibute
// node
fileName = fileElement.getElementsByTagName(ATTRIBUTE_FILENAME).item(0).getTextContent();
Node fieldsNode = fileElement.getElementsByTagName(ATTRIBUTE_FIELDS).item(0); // Only
// one
// fields
if (fieldsNode.getNodeType() == Node.ELEMENT_NODE) {
Element fieldsElement = (Element) fieldsNode; // fields
// node
NodeList fieldList = fieldsElement.getElementsByTagName(ATTRIBUTE_FIELD);
for (int y = 0; y < fieldList.getLength(); y++) {
Node fieldNode = fieldList.item(y);
if (fieldNode.getNodeType() == Node.ELEMENT_NODE) {
Element fieldElement = (Element) fieldNode; // attibute
// node
String name = fieldElement.getElementsByTagName(ATTRIBUTE_NAME).item(0).getTextContent().trim();
String value = fieldElement.getElementsByTagName(ATTRIBUTE_VALUE).item(0).getTextContent().trim();
if (name.toLowerCase().equals(FIELD_NAME_FOLIO)) {
folio = value;
} else if (name.toLowerCase().equals(FIELD_NAME_RUT)) {
rut = value;
} else if (name.toLowerCase().equals(FIELD_NAME_PLACA_PATENTE)) {
placaPatente = value;
} else if (name.toLowerCase().equals(FIELD_NAME_NOMBRES)) {
nombres = value;
} else if (name.toLowerCase().equals(FIELD_NAME_APELLIDO_PATERNO)) {
apellidoPaterno = value;
} else if (name.toLowerCase().equals(FIELD_NAME_APELLIDO_MATERNO)) {
apellidoMaterno = value;
} else if (name.toLowerCase().equals(FIELD_NAME_ANO)) {
ano = value;
} else if (name.toLowerCase().equals(FIELD_NAME_TIPO_DOCUMENTO)) {
tipoDocumento = value;
}
}
}
}
if (fileName != null && !fileName.equals("")) {
QueryParams queryParams = new QueryParams();
queryParams.setDomain(QueryParams.DOCUMENT);
queryParams.setName(fileName);
Collection<QueryResult> results = OKMSearch.getInstance().find(null, queryParams);
if (results.size() == 1) {
for (QueryResult queryResult : results) {
if (queryResult.getDocument() != null) {
System.out.println(fileName + " -ok");
boolean found = false;
for (PropertyGroup group : OKMPropertyGroup.getInstance().getGroups(null,
queryResult.getDocument().getPath())) {
if (group.getName().equals("okg:data")) {
found = true;
}
}
if (!found) {
OKMPropertyGroup.getInstance().addGroup(null, queryResult.getDocument().getPath(), "okg:data");
}
Map<String, String> propertiesMap = new HashMap<String, String>();
propertiesMap.put("okp:data.folio", folio);
propertiesMap.put("okp:data.rut", rut);
propertiesMap.put("okp:data.placa", placaPatente);
propertiesMap.put("okp:data.nombres", nombres);
propertiesMap.put("okp:data.apellido_padre", apellidoPaterno);
propertiesMap.put("okp:data.apellido_madre", apellidoMaterno);
propertiesMap.put("okp:data.year", ano);
propertiesMap.put("okp:data.documento", tipoDocumento);
OKMPropertyGroup.getInstance().setPropertiesSimple(null, queryResult.getDocument().getPath(),
"okg:data", propertiesMap);
}
}
} else if (results.size() > 1) {
System.out.println(fileName + " - error");
} else {
System.out.println(fileName + " - not found");
}
}
}
}
xmlFile.delete();
}
}
}
/**
* importFiles
*/
public static void importFiles() throws UnsupportedMimeTypeException, FileSizeExceededException, UserQuotaExceededException,
VirusDetectedException, ItemExistsException, PathNotFoundException, AccessDeniedException, RepositoryException, IOException,
DatabaseException, ExtensionException, AutomationException {
String systemToken = DbSessionManager.getInstance().getSystemToken();
// Loading files
File folder = new File(SYSTEM_FOLDER + "/Output");
File[] listOfFiles = folder.listFiles();
for (int i = 0; i < listOfFiles.length; i++) {
File file = listOfFiles[i];
if (file.isFile() && file.getName().toLowerCase().endsWith(".pdf")) {
Document doc = new Document();
doc.setPath(OPENKM_FOLDER + "/" + file.getName());
FileInputStream fis = new FileInputStream(file);
doc = OKMDocument.getInstance().create(systemToken, doc, fis);
file.delete();
}
}
}
}
Example
Create .jar file:
Right click the XMLImporter.java file and select export option.
Select .jar file and click next button.
Register XMLImporter.jar in crontab tasks:
Register metadata:
Files in openkm server at /home/openkm/import/logfile folder
Files in openkm server at /home/openkm/import/Output folder
Imported documents view from OpenKM desktop: