diff --git a/excel/pom.xml b/excel/pom.xml index 7f8e8f09f..a87f50f4c 100644 --- a/excel/pom.xml +++ b/excel/pom.xml @@ -40,7 +40,7 @@ under the License. org.apache.poi poi-ooxml - 4.0.1 + 4.1.0 commons-logging diff --git a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java index 97ba50f6a..c4f16fb14 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java +++ b/excel/src/main/java/org/apache/metamodel/excel/DefaultSpreadsheetReaderDelegate.java @@ -18,6 +18,7 @@ */ package org.apache.metamodel.excel; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.stream.Collectors; @@ -41,6 +42,7 @@ import org.apache.metamodel.util.FileHelper; import org.apache.metamodel.util.Resource; import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; @@ -51,12 +53,12 @@ * The default {@link SpreadsheetReaderDelegate}, which uses POI's main user * model to read spreadsheets: the Workbook class. */ -final class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate { +class DefaultSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate { private static final Logger logger = LoggerFactory.getLogger(DefaultSpreadsheetReaderDelegate.class); - private final Resource _resource; - private final ExcelConfiguration _configuration; + protected final Resource _resource; + protected final ExcelConfiguration _configuration; public DefaultSpreadsheetReaderDelegate(Resource resource, ExcelConfiguration configuration) { _resource = resource; @@ -64,7 +66,7 @@ public DefaultSpreadsheetReaderDelegate(Resource resource, ExcelConfiguration co } @Override - public Schema createSchema(String schemaName) { + public Schema createSchema(String schemaName) throws Exception { final MutableSchema schema = new MutableSchema(schemaName); final Workbook wb = ExcelUtils.readWorkbook(_resource, true); try { @@ -82,7 +84,7 @@ public Schema createSchema(String schemaName) { } @Override - public DataSet executeQuery(Table table, List columns, int maxRows) { + public DataSet executeQuery(Table table, List columns, int maxRows) throws Exception { final Workbook wb = ExcelUtils.readWorkbook(_resource, true); final Sheet sheet = wb.getSheet(table.getName()); @@ -129,6 +131,7 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) { } final int columnNameLineNumber = _configuration.getColumnNameLineNumber(); + final ColumnType[] columnTypes = getColumnTypes(sheet, row); if (columnNameLineNumber == ExcelConfiguration.NO_COLUMN_NAME_LINE) { // get to the first non-empty line (no matter if lines are skipped @@ -149,7 +152,7 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) { for (int j = offset; j < row.getLastCellNum(); j++) { final ColumnNamingContext namingContext = new ColumnNamingContextImpl(table, null, j); final Column column = new MutableColumn(columnNamingSession.getNextColumnName(namingContext), - ColumnType.STRING, table, j, true); + columnTypes[j], table, j, true); table.addColumn(column); } } @@ -169,13 +172,78 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) { } if (hasColumns) { - createColumns(table, wb, row); + createColumns(table, wb, row, columnTypes); } } return table; } + protected ColumnType[] getColumnTypes(final Sheet sheet, final Row row) { + final Iterator data = ExcelUtils.getRowIterator(sheet, _configuration, false); + final int rowLength = row.getLastCellNum(); + final ColumnType[] columnTypes = new ColumnType[rowLength]; + if (_configuration.isDetectColumnTypes()) { + + int numberOfLinesToScan = _configuration.getNumberOfLinesToScan(); + + while (data.hasNext() && numberOfLinesToScan-- > 0) { + final Row currentRow = data.next(); + if (currentRow.getRowNum() < _configuration.getColumnNameLineNumber()) { + continue; + } + for (int index = 0; index < rowLength; index++) { + if (currentRow.getLastCellNum() == 0) { + continue; + } + + final ColumnType columnType = columnTypes[index]; + final ColumnType expectedColumnType = getColumnTypeFromRow(currentRow, index); + if (columnType != null) { + if (!columnType.equals(ColumnType.STRING) && !columnType.equals(expectedColumnType)) { + columnTypes[index] = ColumnType.VARCHAR; + } + } else { + columnTypes[index] = expectedColumnType; + } + } + } + } else { + Arrays.fill(columnTypes, ColumnType.STRING); + } + return columnTypes; + } + + protected ColumnType getColumnTypeFromRow(final Row currentRow, int index) { + if (currentRow.getCell(index) == null) { + return ColumnType.STRING; + } else { + switch (currentRow.getCell(index).getCellType()) { + case NUMERIC: + if (DateUtil.isCellDateFormatted(currentRow.getCell(index))) { + return ColumnType.DATE; + } else { + return (currentRow.getCell(index).getNumericCellValue() % 1 == 0) + ? ColumnType.INTEGER : ColumnType.DOUBLE; + } + case BOOLEAN: + return ColumnType.BOOLEAN; + case ERROR: + // fall through + case _NONE: + // fall through + case STRING: + // fall through + case FORMULA: + // fall through + case BLANK: + // fall through + default: + return ColumnType.STRING; + } + } + } + /** * Builds columns based on row/cell values. * @@ -183,7 +251,8 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) { * @param wb * @param row */ - private void createColumns(MutableTable table, Workbook wb, Row row) { + private void createColumns(final MutableTable table, final Workbook wb, final Row row, + final ColumnType[] columTypes) { if (row == null) { logger.warn("Cannot create columns based on null row!"); return; @@ -197,11 +266,17 @@ private void createColumns(MutableTable table, Workbook wb, Row row) { .startColumnNamingSession()) { for (int j = offset; j < rowLength; j++) { final Cell cell = row.getCell(j); - final String intrinsicColumnName = ExcelUtils.getCellValue(wb, cell); + Object cellValue = ExcelUtils.getCellValue(wb, cell); + final String intrinsicColumnName = cellValue == null ? "" : cellValue.toString(); final ColumnNamingContext columnNamingContext = new ColumnNamingContextImpl(table, intrinsicColumnName, j); final String columnName = columnNamingSession.getNextColumnName(columnNamingContext); - final Column column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true); + final Column column; + if (!_configuration.isDetectColumnTypes()) { + column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true); + } else { + column = new MutableColumn(columnName, columTypes[j], table, j, true); + } table.addColumn(column); } } diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java index 4779bb1e7..9bb620e9f 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java +++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelConfiguration.java @@ -33,30 +33,45 @@ public final class ExcelConfiguration extends BaseObject implements Serializable { - private static final long serialVersionUID = 1L; + + private static final long serialVersionUID = 1L; public static final int NO_COLUMN_NAME_LINE = 0; public static final int DEFAULT_COLUMN_NAME_LINE = 1; + private static final int NUMBERS_OF_LINES_TO_SCAN = 1000; + private final int numberOfLinesToScan; private final int columnNameLineNumber; private final ColumnNamingStrategy columnNamingStrategy; private final boolean skipEmptyLines; private final boolean skipEmptyColumns; + private final boolean detectColumnTypes; public ExcelConfiguration() { this(DEFAULT_COLUMN_NAME_LINE, true, false); } public ExcelConfiguration(int columnNameLineNumber, boolean skipEmptyLines, boolean skipEmptyColumns) { - this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns); + this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns, false, NUMBERS_OF_LINES_TO_SCAN); + } + + public ExcelConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy, + Boolean skipEmptyLines, Boolean skipEmptyColumns) { + this(columnNameLineNumber, columnNamingStrategy, skipEmptyLines, skipEmptyColumns, false, NUMBERS_OF_LINES_TO_SCAN); + } + + public ExcelConfiguration(int columnNameLineNumber, boolean skipEmptyLines, boolean skipEmptyColumns, boolean detectColumnTypes) { + this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns, detectColumnTypes, NUMBERS_OF_LINES_TO_SCAN); } public ExcelConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy, - boolean skipEmptyLines, boolean skipEmptyColumns) { + boolean skipEmptyLines, boolean skipEmptyColumns, boolean detectColumnTypes, int numberOfLinesToScan) { this.columnNameLineNumber = columnNameLineNumber; this.skipEmptyLines = skipEmptyLines; this.skipEmptyColumns = skipEmptyColumns; this.columnNamingStrategy = columnNamingStrategy; + this.detectColumnTypes = detectColumnTypes; + this.numberOfLinesToScan = numberOfLinesToScan; } /** @@ -102,17 +117,34 @@ public boolean isSkipEmptyColumns() { return skipEmptyColumns; } + /** + * Defines if columns in the excel spreadsheet should be validated on datatypes while + * reading the spreadsheet. + * + * @return a boolean indicating whether or not to validate column types. + */ + public boolean isDetectColumnTypes() { + return detectColumnTypes; + } + @Override protected void decorateIdentity(List identifiers) { identifiers.add(columnNameLineNumber); identifiers.add(skipEmptyLines); identifiers.add(skipEmptyColumns); + identifiers.add(detectColumnTypes); + identifiers.add(numberOfLinesToScan); } @Override public String toString() { return "ExcelConfiguration[columnNameLineNumber=" + columnNameLineNumber + ", skipEmptyLines=" + skipEmptyLines - + ", skipEmptyColumns=" + skipEmptyColumns + "]"; + + ", skipEmptyColumns=" + skipEmptyColumns +", detectColumnTypes=" + + detectColumnTypes + ", numbersOfLinesToScan=" + numberOfLinesToScan + "]"; + } + + public int getNumberOfLinesToScan() { + return numberOfLinesToScan; } } diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelInsertBuilder.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelInsertBuilder.java index b584e7693..80c8165f7 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/ExcelInsertBuilder.java +++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelInsertBuilder.java @@ -20,12 +20,7 @@ import java.util.Date; -import org.apache.poi.ss.usermodel.Cell; -import org.apache.poi.ss.usermodel.CellStyle; -import org.apache.poi.ss.usermodel.FillPatternType; -import org.apache.poi.ss.usermodel.Font; -import org.apache.poi.ss.usermodel.HorizontalAlignment; -import org.apache.poi.ss.usermodel.Row; +import org.apache.metamodel.MetaModelException; import org.apache.metamodel.data.Style; import org.apache.metamodel.data.Style.Color; import org.apache.metamodel.data.Style.SizeUnit; @@ -33,8 +28,15 @@ import org.apache.metamodel.insert.AbstractRowInsertionBuilder; import org.apache.metamodel.insert.RowInsertionBuilder; import org.apache.metamodel.schema.Column; +import org.apache.metamodel.schema.ColumnType; import org.apache.metamodel.schema.Table; import org.apache.metamodel.util.LazyRef; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.CellStyle; +import org.apache.poi.ss.usermodel.FillPatternType; +import org.apache.poi.ss.usermodel.Font; +import org.apache.poi.ss.usermodel.HorizontalAlignment; +import org.apache.poi.ss.usermodel.Row; /** * {@link RowInsertionBuilder} for excel spreadsheets. @@ -149,8 +151,33 @@ protected CellStyle fetch() { cell.setCellStyle(cellStyle.get()); } } + validateUpdateType(row); } } + + private void validateUpdateType(final Row original) { + for (int index = 0; index < this.getColumns().length; index++) { + final ColumnType columnType = getColumns()[index].getType(); + if (columnType != null && getValues()[index] != null) { + switch (columnType.getName()) { + case "INTEGER": + try { + Integer.decode(getValues()[index].toString()); + } catch (NumberFormatException ex) { + throw new MetaModelException(original.getCell(index) + + " should be an Integer!"); + } + break; + case "STRING": + // fall through + case "VARCHAR": + // fall through + default: + break; + } + } + } + } /** * Converts a percentage based font size to excel "pt" scale. diff --git a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java index 2da6ef39b..9b2b770ef 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java +++ b/excel/src/main/java/org/apache/metamodel/excel/ExcelUtils.java @@ -171,14 +171,14 @@ public static void writeAndCloseWorkbook(ExcelDataContext dataContext, final Wor } - public static String getCellValue(Workbook wb, Cell cell) { + public static Object getCellValue(Workbook wb, Cell cell) { if (cell == null) { return null; } final String cellCoordinate = "(" + cell.getRowIndex() + "," + cell.getColumnIndex() + ")"; - final String result; + final Object result; switch (cell.getCellType()) { case BLANK: @@ -186,7 +186,7 @@ public static String getCellValue(Workbook wb, Cell cell) { result = null; break; case BOOLEAN: - result = Boolean.toString(cell.getBooleanCellValue()); + result = cell.getBooleanCellValue(); break; case ERROR: String errorResult; @@ -237,7 +237,7 @@ public static String getCellValue(Workbook wb, Cell cell) { return result; } - private static String getFormulaCellValue(Workbook wb, Cell cell) { + private static Object getFormulaCellValue(Workbook wb, Cell cell) { // first try with a cached/precalculated value try { double numericCellValue = cell.getNumericCellValue(); @@ -414,13 +414,13 @@ public static Iterator getRowIterator(Sheet sheet, ExcelConfiguration confi */ public static DefaultRow createRow(Workbook workbook, Row row, DataSetHeader header) { final int size = header.size(); - final String[] values = new String[size]; + final Object[] values = new Object[size]; final Style[] styles = new Style[size]; if (row != null) { for (int i = 0; i < size; i++) { final int columnNumber = header.getSelectItem(i).getColumn().getColumnNumber(); final Cell cell = row.getCell(columnNumber); - final String value = ExcelUtils.getCellValue(workbook, cell); + final Object value = ExcelUtils.getCellValue(workbook, cell); final Style style = ExcelUtils.getCellStyle(workbook, cell); values[i] = value; styles[i] = style; diff --git a/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java b/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java index 81214fe7b..884c37fd2 100644 --- a/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java +++ b/excel/src/main/java/org/apache/metamodel/excel/XlsxSpreadsheetReaderDelegate.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -46,6 +47,9 @@ import org.apache.metamodel.util.FileResource; import org.apache.metamodel.util.Resource; import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,17 +61,14 @@ * This implementation is very efficient as it uses SAX XML parsing which does * not bloat memory usage in the same way that POI's user model does. */ -final class XlsxSpreadsheetReaderDelegate implements SpreadsheetReaderDelegate { +final class XlsxSpreadsheetReaderDelegate extends DefaultSpreadsheetReaderDelegate { private static final Logger logger = LoggerFactory.getLogger(XlsxSpreadsheetReaderDelegate.class); - private final Resource _resource; - private final ExcelConfiguration _configuration; private final Map _tableNamesToInternalIds; public XlsxSpreadsheetReaderDelegate(Resource resource, ExcelConfiguration configuration) { - _resource = resource; - _configuration = configuration; + super(resource, configuration); _tableNamesToInternalIds = new ConcurrentHashMap(); } @@ -151,11 +152,30 @@ public void close() throws IOException { } }); } - + private void buildColumns(final MutableTable table, final String relationshipId, final XSSFReader xssfReader) throws Exception { final InputStream sheetData = xssfReader.getSheet(relationshipId); + final Workbook wb = ExcelUtils.readWorkbook(_resource, true); + Sheet sheet = wb.getSheetAt(0); + final Iterator rowIterator = ExcelUtils.getRowIterator(sheet, _configuration, false); + + Row row = null; + if (!rowIterator.hasNext()) { + // no physical rows in sheet + return; + } + + if (_configuration.isSkipEmptyLines()) { + while (row == null && rowIterator.hasNext()) { + row = rowIterator.next(); + } + } else { + row = rowIterator.next(); + } + final Row currentRow = row; + final XlsxRowCallback rowCallback = new XlsxRowCallback() { @Override public boolean row(int rowNumber, List values, List