Simple autotagging

From OpenKM Documentation
Revision as of 13:02, 24 May 2013 by Pavila (talk | contribs)

Jump to: navigation, search

The script assign keywords to documents name based in database metadata values present into document content.

Description:

  • There's a database metadata table doc_type with document type values.
  • There's a script which looking for key values into documents.
  • There's an automation task - based in scripting - executed after uploaded document which tags documents.

Database metadata:

-- DOCS TYPE
DELETE FROM OKM_DB_METADATA_TYPE WHERE DMT_TABLE='doc_type';
INSERT INTO OKM_DB_METADATA_TYPE (DMT_TABLE, DMT_REAL_CoLUMN, DMT_TYPE, DMT_VIRTUAL_CoLUMN) VALUES ('doc_type', 'col00', 'text', 'dt_id');
INSERT INTO OKM_DB_METADATA_TYPE (DMT_TABLE, DMT_REAL_CoLUMN, DMT_TYPE, DMT_VIRTUAL_CoLUMN) VALUES ('doc_type', 'col01', 'text', 'dt_description');
 
-- VALUES
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','1','Article');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','25','Audio');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','6','Broker Note');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','8','Case Study');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','5','Company Information');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','4','Conference Report');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','3','Course Material');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','7','Dissertation');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','12','Form');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','11','Image');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','26','Infographics');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','16','Interview');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','17','Presentation');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','19','Report');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','14','Video');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','0','_Other');

Code:

import com.openkm.dao.bean.NodeDocumentVersion;
import com.openkm.dao.bean.NodeDocument;
import com.openkm.extractor.TextExtractorWork;
import com.openkm.api.OKMRepository;
import com.openkm.dao.NodeDocumentVersionDAO;
import com.openkm.dao.NodeDocumentDAO;
import org.hibernate.Session;
import com.openkm.dao.HibernateUtil;
import org.hibernate.Query;
import com.openkm.dao.bean.DatabaseMetadataValue;
import com.openkm.module.db.stuff.DbSessionManager;
import com.openkm.api.OKMProperty;
import com.openkm.dao.HibernateUtil;
import com.openkm.core.DatabaseException;
import org.hibernate.HibernateException;

String systemToken = DbSessionManager.getInstance().getSystemToken();

// Get path
String docPath = OKMRepository.getInstance().getNodePath(null, uuid);

// Get doc version uuid
NodeDocumentVersion currentVersion = NodeDocumentVersionDAO.getInstance().findCurrentVersion(uuid);
String docVerUuuid = currentVersion.getUuid();

// Document extractor
TextExtractorWork tew = new TextExtractorWork();
tew.setDocUuid(uuid);
tew.setDocPath(docPath);
tew.setDocVerUuid(docVerUuuid);

// Execute extractor
NodeDocumentDAO.getInstance().textExtractorHelper(tew);

// Get extracted text
NodeDocument docNode = NodeDocumentDAO.getInstance().findByPk(uuid);
String text = docNode.getText().toLowerCase();

// Looking for metadata description values
String qs = "from DatabaseMetadataValue";
Session session = HibernateUtil.getSessionFactory().openSession();

try {
  Query q = session.createQuery(qs);
  List ret = q.list();
  
  for (DatabaseMetadataValue dmv : ret ) {
    if (text.contains(dmv.getCol01().toLowerCase())) {
      OKMProperty.getInstance().addKeyword(systemToken, docPath, dmv.getCol01().toLowerCase());
    }
  }
} catch (HibernateException e) {
  throw new DatabaseException(e.getMessage(), e);
} finally {
  HibernateUtil.close(session);
}

Example

Register database metadata values:

Okm user guide 451.png


Register automation task:

Okm user guide 452.png


Okm user guide 453.png


Okm user guide 455.png


Autogging uploaded file:

Okm user guide 455.png


Okm user guide 456.png