summaryrefslogtreecommitdiff
path: root/src/dictionary/DicCsvReader.cpp
diff options
context:
space:
mode:
authorJedidiah Barber <contact@jedbarber.id.au>2021-07-14 11:49:10 +1200
committerJedidiah Barber <contact@jedbarber.id.au>2021-07-14 11:49:10 +1200
commitd24f813f3f2a05c112e803e4256b53535895fc98 (patch)
tree601e6ae9a1cd44bcfdcf91739a5ca36aedd827c9 /src/dictionary/DicCsvReader.cpp
Initial mirror commitHEADmaster
Diffstat (limited to 'src/dictionary/DicCsvReader.cpp')
-rw-r--r--src/dictionary/DicCsvReader.cpp205
1 files changed, 205 insertions, 0 deletions
diff --git a/src/dictionary/DicCsvReader.cpp b/src/dictionary/DicCsvReader.cpp
new file mode 100644
index 0000000..ec80225
--- /dev/null
+++ b/src/dictionary/DicCsvReader.cpp
@@ -0,0 +1,205 @@
+#include "DicCsvReader.h"
+#include "Dictionary.h"
+#include "CardPack.h"
+#include "DicRecord.h"
+
+DicCsvReader::DicCsvReader( Dictionary* aDict ):
+ m_dict( aDict )
+ {
+ }
+
+DicCsvReader::DicCsvReader():
+ m_dict( NULL )
+ {
+ }
+
+QFile::FileError DicCsvReader::readDict( const QString aCsvFilePath, const CsvImportData& aImportData )
+ {
+ if( !m_dict )
+ return QFile::NoError;
+
+ initData( aImportData );
+
+ // Erase dictionary content
+ m_dict->clearFieldPackConfig();
+ if( m_dict->entriesNum() > 0 )
+ m_dict->removeRecords( 0, m_dict->entriesNum() );
+
+ QFile file( aCsvFilePath );
+ if( !file.open( QIODevice::ReadOnly | QFile::Text ) ) // \r\n --> \n
+ return file.error();
+ QTextStream inStream( &file );
+ inStream.setCodec( m_params.textCodec );
+
+ // Ignore first rows
+ int rowNum = 1;
+ while( !inStream.atEnd() && rowNum++ < m_params.fromLine )
+ inStream.readLine();
+
+ int fieldsNum = readLines( inStream );
+ file.close();
+
+ // Add entries to dictionary
+ foreach( DicRecord* entry, m_entries )
+ m_dict->addRecord(entry);
+
+ // Name nameless fields and create dictionary fields
+ for( int i = 0; i < fieldsNum; i++ )
+ {
+ QString name = m_fieldNames.value( i );
+ if( name.isEmpty() )
+ name = QString::number( i+1 );
+ m_dict->addField( name );
+ }
+
+ // Create packs
+ CardPack* pack;
+ QList<const Field*> ansFields;
+
+ pack = new CardPack( m_dict );
+ pack->setQstField( m_dict->field(0) );
+ ansFields << m_dict->field(1);
+ for( int i = 2; i < m_dict->fieldsNum(); i++ )
+ ansFields << m_dict->field(i);
+ pack->setAnsFields( ansFields );
+ m_dict->addCardPack( pack );
+
+ ansFields.clear();
+ pack = new CardPack( m_dict );
+ pack->setQstField( m_dict->field(1) );
+ ansFields << m_dict->field(0);
+ for( int i = 2; i < m_dict->fieldsNum(); i++ )
+ ansFields << m_dict->field(i);
+ pack->setAnsFields( ansFields );
+ m_dict->addCardPack( pack );
+
+ return QFile::NoError;
+ }
+
+// The read field names are returned with fieldNames().
+
+QList<DicRecord*> DicCsvReader::readEntries( QString aCsvEntries, const CsvImportData& aImportData )
+ {
+ initData( aImportData );
+ QTextStream inStream( &aCsvEntries );
+ readLines( inStream );
+ if( !m_fieldNames.empty() )
+ return m_entries;
+ else
+ return QList<DicRecord*>(); /* If no required header, they are not real entries.
+ * When pasting, this text cannot be parsed and must be ignored. */
+ }
+
+void DicCsvReader::initData( const CsvImportData& aImportData )
+ {
+ m_params = aImportData;
+ // Construct regexp for the separators
+ QString fieldSepRxStr;
+ switch( m_params.fieldSeparationMode )
+ {
+ case EFieldSeparatorAnyCharacter:
+ fieldSepRxStr = QString("[") + m_params.fieldSeparators + "]";
+ break;
+ case EFieldSeparatorAnyCombination:
+ fieldSepRxStr = QString("[") + m_params.fieldSeparators + "]+";
+ break;
+ case EFieldSeparatorExactString:
+ fieldSepRxStr = m_params.fieldSeparators;
+ break;
+ }
+ m_fieldSepRx = QRegExp( fieldSepRxStr );
+ QChar delim = m_params.textDelimiter;
+ m_chunkEndRx = QRegExp( QString("(") + fieldSepRxStr + "|" + delim + ")" ); // End of text chunk
+ m_singleDelimiterRx = QRegExp( QString("([^") + delim + "]|^)" + delim + "([^" + delim + "]|$)" ); // Single text delimiter
+ }
+
+int DicCsvReader::readLines( QTextStream& aInStream )
+ {
+ QString line;
+
+ // Read field names from the header
+ m_fieldNames.clear();
+ if( m_params.firstLineIsHeader && !aInStream.atEnd() )
+ {
+ line = aInStream.readLine();
+ if( line.startsWith( m_params.commentChar ) )
+ line.remove( m_params.commentChar );
+ m_fieldNames = readFields( line.trimmed() );
+ }
+
+ // Read lines and create entries
+ int fieldsNum = 0;
+ m_entries.clear();
+ while( !aInStream.atEnd() )
+ {
+ line = aInStream.readLine();
+ QStringList fields = readFields( line.trimmed() );
+ DicRecord* entry = new DicRecord();
+ for( int i = 0; i < fields.size(); i++ )
+ {
+ QString name = m_fieldNames.value( i, QString::number( i+1 ) );
+ QString field = fields.value( i );
+ entry->setField( name, field );
+ }
+ m_entries << entry; // Add even empty entries (without fields)
+ if( fields.size() > fieldsNum )
+ fieldsNum = fields.size();
+ }
+ return fieldsNum;
+ }
+
+QStringList DicCsvReader::readFields( const QString aCsvLine )
+ {
+ QChar comment = m_params.commentChar;
+ if( !comment.isNull() && aCsvLine.startsWith( comment ) )
+ return QStringList(); // FUTURE FEATURE: read and mark as disabled
+
+ QChar delim = m_params.textDelimiter;
+ QStringList fields;
+ int curPos = 0;
+ QString curText;
+ while( curPos < aCsvLine.length() )
+ {
+ QChar curChar = aCsvLine[curPos];
+ if( curChar == delim ) // Text delimiter - Process the text until the next delimiter
+ {
+ int quoteEnd = aCsvLine.indexOf( m_singleDelimiterRx, curPos + 1 );
+ if( quoteEnd == -1) // End of line
+ quoteEnd = aCsvLine.length();
+ curText += aCsvLine.mid( curPos+1, quoteEnd-curPos );
+ curPos = quoteEnd + 2; // move beyond the delimiter
+ }
+ else if( m_fieldSepRx.indexIn( aCsvLine, curPos ) == curPos ) // Field separator - End of field
+ {
+ int sepLength = m_fieldSepRx.matchedLength();
+ curPos += sepLength;
+ fields << unescapeString( curText.trimmed() );
+ curText.clear();
+ }
+ else // Chunk of normal text. Process until next field separator or text delimiter.
+ {
+ int chunkEnd = aCsvLine.indexOf( m_chunkEndRx, curPos );
+ if( chunkEnd == -1 ) // End of line
+ chunkEnd = aCsvLine.length();
+ curText += aCsvLine.mid( curPos, chunkEnd-curPos );
+ curPos = chunkEnd;
+ }
+ }
+ if( !curText.isEmpty() ) // last Field
+ fields << unescapeString( curText.trimmed() );
+
+ if( m_params.colsToImport > 0 ) // Take only needed imported fields
+ fields = fields.mid( 0, m_params.colsToImport );
+
+ return fields;
+}
+
+/**
+ * Replaces double delimiters with one delimiter
+ */
+QString DicCsvReader::unescapeString( QString aString )
+{
+ QString delim = m_params.textDelimiter;
+ aString = aString.replace( delim + delim, delim );
+ return aString;
+}