diff options
author | Jedidiah Barber <contact@jedbarber.id.au> | 2021-07-14 11:49:10 +1200 |
---|---|---|
committer | Jedidiah Barber <contact@jedbarber.id.au> | 2021-07-14 11:49:10 +1200 |
commit | d24f813f3f2a05c112e803e4256b53535895fc98 (patch) | |
tree | 601e6ae9a1cd44bcfdcf91739a5ca36aedd827c9 /src/dictionary/DicCsvReader.cpp |
Diffstat (limited to 'src/dictionary/DicCsvReader.cpp')
-rw-r--r-- | src/dictionary/DicCsvReader.cpp | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/src/dictionary/DicCsvReader.cpp b/src/dictionary/DicCsvReader.cpp new file mode 100644 index 0000000..ec80225 --- /dev/null +++ b/src/dictionary/DicCsvReader.cpp @@ -0,0 +1,205 @@ +#include "DicCsvReader.h" +#include "Dictionary.h" +#include "CardPack.h" +#include "DicRecord.h" + +DicCsvReader::DicCsvReader( Dictionary* aDict ): + m_dict( aDict ) + { + } + +DicCsvReader::DicCsvReader(): + m_dict( NULL ) + { + } + +QFile::FileError DicCsvReader::readDict( const QString aCsvFilePath, const CsvImportData& aImportData ) + { + if( !m_dict ) + return QFile::NoError; + + initData( aImportData ); + + // Erase dictionary content + m_dict->clearFieldPackConfig(); + if( m_dict->entriesNum() > 0 ) + m_dict->removeRecords( 0, m_dict->entriesNum() ); + + QFile file( aCsvFilePath ); + if( !file.open( QIODevice::ReadOnly | QFile::Text ) ) // \r\n --> \n + return file.error(); + QTextStream inStream( &file ); + inStream.setCodec( m_params.textCodec ); + + // Ignore first rows + int rowNum = 1; + while( !inStream.atEnd() && rowNum++ < m_params.fromLine ) + inStream.readLine(); + + int fieldsNum = readLines( inStream ); + file.close(); + + // Add entries to dictionary + foreach( DicRecord* entry, m_entries ) + m_dict->addRecord(entry); + + // Name nameless fields and create dictionary fields + for( int i = 0; i < fieldsNum; i++ ) + { + QString name = m_fieldNames.value( i ); + if( name.isEmpty() ) + name = QString::number( i+1 ); + m_dict->addField( name ); + } + + // Create packs + CardPack* pack; + QList<const Field*> ansFields; + + pack = new CardPack( m_dict ); + pack->setQstField( m_dict->field(0) ); + ansFields << m_dict->field(1); + for( int i = 2; i < m_dict->fieldsNum(); i++ ) + ansFields << m_dict->field(i); + pack->setAnsFields( ansFields ); + m_dict->addCardPack( pack ); + + ansFields.clear(); + pack = new CardPack( m_dict ); + pack->setQstField( m_dict->field(1) ); + ansFields << m_dict->field(0); + for( int i = 2; i < m_dict->fieldsNum(); i++ ) + ansFields << m_dict->field(i); + pack->setAnsFields( ansFields ); + m_dict->addCardPack( pack ); + + return QFile::NoError; + } + +// The read field names are returned with fieldNames(). + +QList<DicRecord*> DicCsvReader::readEntries( QString aCsvEntries, const CsvImportData& aImportData ) + { + initData( aImportData ); + QTextStream inStream( &aCsvEntries ); + readLines( inStream ); + if( !m_fieldNames.empty() ) + return m_entries; + else + return QList<DicRecord*>(); /* If no required header, they are not real entries. + * When pasting, this text cannot be parsed and must be ignored. */ + } + +void DicCsvReader::initData( const CsvImportData& aImportData ) + { + m_params = aImportData; + // Construct regexp for the separators + QString fieldSepRxStr; + switch( m_params.fieldSeparationMode ) + { + case EFieldSeparatorAnyCharacter: + fieldSepRxStr = QString("[") + m_params.fieldSeparators + "]"; + break; + case EFieldSeparatorAnyCombination: + fieldSepRxStr = QString("[") + m_params.fieldSeparators + "]+"; + break; + case EFieldSeparatorExactString: + fieldSepRxStr = m_params.fieldSeparators; + break; + } + m_fieldSepRx = QRegExp( fieldSepRxStr ); + QChar delim = m_params.textDelimiter; + m_chunkEndRx = QRegExp( QString("(") + fieldSepRxStr + "|" + delim + ")" ); // End of text chunk + m_singleDelimiterRx = QRegExp( QString("([^") + delim + "]|^)" + delim + "([^" + delim + "]|$)" ); // Single text delimiter + } + +int DicCsvReader::readLines( QTextStream& aInStream ) + { + QString line; + + // Read field names from the header + m_fieldNames.clear(); + if( m_params.firstLineIsHeader && !aInStream.atEnd() ) + { + line = aInStream.readLine(); + if( line.startsWith( m_params.commentChar ) ) + line.remove( m_params.commentChar ); + m_fieldNames = readFields( line.trimmed() ); + } + + // Read lines and create entries + int fieldsNum = 0; + m_entries.clear(); + while( !aInStream.atEnd() ) + { + line = aInStream.readLine(); + QStringList fields = readFields( line.trimmed() ); + DicRecord* entry = new DicRecord(); + for( int i = 0; i < fields.size(); i++ ) + { + QString name = m_fieldNames.value( i, QString::number( i+1 ) ); + QString field = fields.value( i ); + entry->setField( name, field ); + } + m_entries << entry; // Add even empty entries (without fields) + if( fields.size() > fieldsNum ) + fieldsNum = fields.size(); + } + return fieldsNum; + } + +QStringList DicCsvReader::readFields( const QString aCsvLine ) + { + QChar comment = m_params.commentChar; + if( !comment.isNull() && aCsvLine.startsWith( comment ) ) + return QStringList(); // FUTURE FEATURE: read and mark as disabled + + QChar delim = m_params.textDelimiter; + QStringList fields; + int curPos = 0; + QString curText; + while( curPos < aCsvLine.length() ) + { + QChar curChar = aCsvLine[curPos]; + if( curChar == delim ) // Text delimiter - Process the text until the next delimiter + { + int quoteEnd = aCsvLine.indexOf( m_singleDelimiterRx, curPos + 1 ); + if( quoteEnd == -1) // End of line + quoteEnd = aCsvLine.length(); + curText += aCsvLine.mid( curPos+1, quoteEnd-curPos ); + curPos = quoteEnd + 2; // move beyond the delimiter + } + else if( m_fieldSepRx.indexIn( aCsvLine, curPos ) == curPos ) // Field separator - End of field + { + int sepLength = m_fieldSepRx.matchedLength(); + curPos += sepLength; + fields << unescapeString( curText.trimmed() ); + curText.clear(); + } + else // Chunk of normal text. Process until next field separator or text delimiter. + { + int chunkEnd = aCsvLine.indexOf( m_chunkEndRx, curPos ); + if( chunkEnd == -1 ) // End of line + chunkEnd = aCsvLine.length(); + curText += aCsvLine.mid( curPos, chunkEnd-curPos ); + curPos = chunkEnd; + } + } + if( !curText.isEmpty() ) // last Field + fields << unescapeString( curText.trimmed() ); + + if( m_params.colsToImport > 0 ) // Take only needed imported fields + fields = fields.mid( 0, m_params.colsToImport ); + + return fields; +} + +/** + * Replaces double delimiters with one delimiter + */ +QString DicCsvReader::unescapeString( QString aString ) +{ + QString delim = m_params.textDelimiter; + aString = aString.replace( delim + delim, delim ); + return aString; +} |