summaryrefslogtreecommitdiff
path: root/src/dictionary/DicCsvReader.cpp
blob: ec802253eff9e0051c581dc3c614dea17302a7dd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#include "DicCsvReader.h"
#include "Dictionary.h"
#include "CardPack.h"
#include "DicRecord.h"

DicCsvReader::DicCsvReader( Dictionary* aDict ):
    m_dict( aDict )
    {
    }

DicCsvReader::DicCsvReader():
    m_dict( NULL )
    {
    }

QFile::FileError DicCsvReader::readDict( const QString aCsvFilePath, const CsvImportData& aImportData )
    {
    if( !m_dict )
        return QFile::NoError;

    initData( aImportData );

    // Erase dictionary content
    m_dict->clearFieldPackConfig();
    if( m_dict->entriesNum() > 0 )
        m_dict->removeRecords( 0, m_dict->entriesNum() );

    QFile file( aCsvFilePath );
    if( !file.open( QIODevice::ReadOnly | QFile::Text ) ) // \r\n --> \n
        return file.error();
    QTextStream inStream( &file );
    inStream.setCodec( m_params.textCodec );

    // Ignore first rows
    int rowNum = 1;
    while( !inStream.atEnd() && rowNum++ < m_params.fromLine )
        inStream.readLine();

    int fieldsNum = readLines( inStream );
    file.close();
    
    // Add entries to dictionary
    foreach( DicRecord* entry, m_entries )
        m_dict->addRecord(entry);
    
    // Name nameless fields and create dictionary fields
    for( int i = 0; i < fieldsNum; i++ )
        {
        QString name = m_fieldNames.value( i );
        if( name.isEmpty() )
            name = QString::number( i+1 );
        m_dict->addField( name );
        }
    
    // Create packs
    CardPack* pack;
    QList<const Field*> ansFields;

    pack = new CardPack( m_dict );
    pack->setQstField( m_dict->field(0) );
    ansFields << m_dict->field(1);
    for( int i = 2; i < m_dict->fieldsNum(); i++ )
        ansFields << m_dict->field(i);
    pack->setAnsFields( ansFields );
    m_dict->addCardPack( pack );

    ansFields.clear();
    pack = new CardPack( m_dict );
    pack->setQstField( m_dict->field(1) );
    ansFields << m_dict->field(0);
    for( int i = 2; i < m_dict->fieldsNum(); i++ )
        ansFields << m_dict->field(i);
    pack->setAnsFields( ansFields );
    m_dict->addCardPack( pack );
    
    return QFile::NoError;
    }

// The read field names are returned with fieldNames().

QList<DicRecord*> DicCsvReader::readEntries( QString aCsvEntries, const CsvImportData& aImportData )
    {
    initData( aImportData );
    QTextStream inStream( &aCsvEntries );
    readLines( inStream );
    if( !m_fieldNames.empty() )
        return m_entries;
   else
        return QList<DicRecord*>();   /* If no required header, they are not real entries.
                                     * When pasting, this text cannot be parsed and must be ignored. */
    }

void DicCsvReader::initData( const CsvImportData& aImportData )
    {
    m_params = aImportData;
    // Construct regexp for the separators
    QString fieldSepRxStr;
    switch( m_params.fieldSeparationMode )
        {
        case EFieldSeparatorAnyCharacter:
            fieldSepRxStr = QString("[") + m_params.fieldSeparators + "]";
            break;
        case EFieldSeparatorAnyCombination:
            fieldSepRxStr = QString("[") + m_params.fieldSeparators + "]+";
            break;
        case EFieldSeparatorExactString:
            fieldSepRxStr = m_params.fieldSeparators;
            break;
        }
    m_fieldSepRx = QRegExp( fieldSepRxStr );
    QChar delim = m_params.textDelimiter;
    m_chunkEndRx = QRegExp( QString("(") + fieldSepRxStr + "|" + delim + ")" );   // End of text chunk
    m_singleDelimiterRx = QRegExp( QString("([^") + delim + "]|^)" + delim + "([^" + delim + "]|$)" );   // Single text delimiter
    }

int DicCsvReader::readLines( QTextStream& aInStream )
    {
    QString line;

    // Read field names from the header
    m_fieldNames.clear();
    if( m_params.firstLineIsHeader && !aInStream.atEnd() )
        {
        line = aInStream.readLine();
        if( line.startsWith( m_params.commentChar ) )
            line.remove( m_params.commentChar );
        m_fieldNames = readFields( line.trimmed() );
        }

    // Read lines and create entries
    int fieldsNum = 0;
    m_entries.clear();
    while( !aInStream.atEnd() )
        {
        line = aInStream.readLine();
        QStringList fields = readFields( line.trimmed() );
        DicRecord* entry = new DicRecord();
        for( int i = 0; i < fields.size(); i++ )
            {
            QString name = m_fieldNames.value( i, QString::number( i+1 ) );
            QString field = fields.value( i );
            entry->setField( name, field );
            }
        m_entries << entry;  // Add even empty entries (without fields)
        if( fields.size() > fieldsNum )
            fieldsNum = fields.size();
        }
    return fieldsNum;
    }

QStringList DicCsvReader::readFields( const QString aCsvLine )
    {
    QChar comment = m_params.commentChar;
    if( !comment.isNull() && aCsvLine.startsWith( comment ) )
        return QStringList();   // FUTURE FEATURE: read and mark as disabled

    QChar delim = m_params.textDelimiter;
    QStringList fields;
    int curPos = 0;
    QString curText;
    while( curPos < aCsvLine.length() )
        {
        QChar curChar = aCsvLine[curPos];
        if( curChar == delim )  // Text delimiter - Process the text until the next delimiter
            {
            int quoteEnd = aCsvLine.indexOf( m_singleDelimiterRx, curPos + 1 );
            if( quoteEnd == -1)  // End of line
                quoteEnd = aCsvLine.length();
            curText += aCsvLine.mid( curPos+1, quoteEnd-curPos );
            curPos = quoteEnd + 2;  // move beyond the delimiter
            }
        else if( m_fieldSepRx.indexIn( aCsvLine, curPos ) == curPos )   // Field separator - End of field
            {
            int sepLength = m_fieldSepRx.matchedLength();
            curPos += sepLength;
            fields << unescapeString( curText.trimmed() );
            curText.clear();
            }
        else    // Chunk of normal text. Process until next field separator or text delimiter.
            {
            int chunkEnd = aCsvLine.indexOf( m_chunkEndRx, curPos );
            if( chunkEnd == -1 )  // End of line
                chunkEnd = aCsvLine.length();
            curText += aCsvLine.mid( curPos, chunkEnd-curPos );
            curPos = chunkEnd;
            }
        }
    if( !curText.isEmpty() )   // last Field
        fields << unescapeString( curText.trimmed() );

    if( m_params.colsToImport > 0 ) // Take only needed imported fields
        fields = fields.mid( 0, m_params.colsToImport );

    return fields;
}

/**
  * Replaces double delimiters with one delimiter
  */
QString DicCsvReader::unescapeString( QString aString )
{
    QString delim = m_params.textDelimiter;
    aString = aString.replace( delim + delim, delim );
    return aString;
}