objstr.c 50.6 KB
Newer Older
xbe's avatar
xbe committed
1
#include <stdbool.h>
2
3
4
5
6
7
#include <string.h>
#include <assert.h>

#include "nlr.h"
#include "misc.h"
#include "mpconfig.h"
8
#include "qstr.h"
9
10
11
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
Dave Hylands's avatar
Dave Hylands committed
12
#include "pfenv.h"
13
14
15

typedef struct _mp_obj_str_t {
    mp_obj_base_t base;
16
17
    machine_uint_t hash : 16; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c)
    machine_uint_t len : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
18
    const byte *data;
19
20
} mp_obj_str_t;

21
STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args);
22
23
const mp_obj_t mp_const_empty_bytes;

24
25
26
27
28
29
30
31
32
// use this macro to extract the string hash
#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }

// use this macro to extract the string length
#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }

// use this macro to extract the string data and length
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }

33
34
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
35
STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len);
36
STATIC void bad_implicit_conversion(mp_obj_t self_in) __attribute__((noreturn));
xyb's avatar
xyb committed
37
38
39
40

/******************************************************************************/
/* str                                                                        */

41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len) {
    // this escapes characters, but it will be very slow to print (calling print many times)
    bool has_single_quote = false;
    bool has_double_quote = false;
    for (const byte *s = str_data, *top = str_data + str_len; (!has_single_quote || !has_double_quote) && s < top; s++) {
        if (*s == '\'') {
            has_single_quote = true;
        } else if (*s == '"') {
            has_double_quote = true;
        }
    }
    int quote_char = '\'';
    if (has_single_quote && !has_double_quote) {
        quote_char = '"';
    }
    print(env, "%c", quote_char);
    for (const byte *s = str_data, *top = str_data + str_len; s < top; s++) {
        if (*s == quote_char) {
            print(env, "\\%c", quote_char);
        } else if (*s == '\\') {
            print(env, "\\\\");
        } else if (32 <= *s && *s <= 126) {
            print(env, "%c", *s);
        } else if (*s == '\n') {
            print(env, "\\n");
        // TODO add more escape codes here if we want to match CPython
        } else {
            print(env, "\\x%02x", *s);
        }
    }
    print(env, "%c", quote_char);
}

74
STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
75
    GET_STR_DATA_LEN(self_in, str_data, str_len);
76
    bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes);
77
    if (kind == PRINT_STR && !is_bytes) {
78
        print(env, "%.*s", str_len, str_data);
79
    } else {
80
81
82
        if (is_bytes) {
            print(env, "b");
        }
83
        mp_str_print_quoted(print, env, str_data, str_len);
84
    }
85
86
}

87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
    switch (n_args) {
        case 0:
            return MP_OBJ_NEW_QSTR(MP_QSTR_);

        case 1:
        {
            vstr_t *vstr = vstr_new();
            mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR);
            mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
            vstr_free(vstr);
            return s;
        }

        case 2:
        case 3:
        {
            // TODO: validate 2nd/3rd args
105
            if (!MP_OBJ_IS_TYPE(args[0], &mp_type_bytes)) {
106
                nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected"));
107
108
109
            }
            GET_STR_DATA_LEN(args[0], str_data, str_len);
            GET_STR_HASH(args[0], str_hash);
110
            mp_obj_str_t *o = str_new(&mp_type_str, NULL, str_len);
111
112
113
114
115
116
            o->data = str_data;
            o->hash = str_hash;
            return o;
        }

        default:
117
            nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments"));
118
119
120
    }
}

121
122
123
124
125
126
127
128
129
130
131
STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
    if (n_args == 0) {
        return mp_const_empty_bytes;
    }

    if (MP_OBJ_IS_STR(args[0])) {
        if (n_args < 2 || n_args > 3) {
            goto wrong_args;
        }
        GET_STR_DATA_LEN(args[0], str_data, str_len);
        GET_STR_HASH(args[0], str_hash);
132
        mp_obj_str_t *o = str_new(&mp_type_bytes, NULL, str_len);
133
134
135
136
137
138
139
140
141
142
143
144
145
        o->data = str_data;
        o->hash = str_hash;
        return o;
    }

    if (n_args > 1) {
        goto wrong_args;
    }

    if (MP_OBJ_IS_SMALL_INT(args[0])) {
        uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
        byte *data;

146
        mp_obj_t o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
        memset(data, 0, len);
        return mp_obj_str_builder_end(o);
    }

    int len;
    byte *data;
    vstr_t *vstr = NULL;
    mp_obj_t o = NULL;
    // Try to create array of exact len if initializer len is known
    mp_obj_t len_in = mp_obj_len_maybe(args[0]);
    if (len_in == MP_OBJ_NULL) {
        len = -1;
        vstr = vstr_new();
    } else {
        len = MP_OBJ_SMALL_INT_VALUE(len_in);
162
        o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
163
164
    }

Damien George's avatar
Damien George committed
165
    mp_obj_t iterable = mp_getiter(args[0]);
166
    mp_obj_t item;
Damien George's avatar
Damien George committed
167
    while ((item = mp_iternext(iterable)) != MP_OBJ_NULL) {
168
169
170
171
172
173
174
175
176
177
178
        if (len == -1) {
            vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item));
        } else {
            *data++ = MP_OBJ_SMALL_INT_VALUE(item);
        }
    }

    if (len == -1) {
        vstr_shrink(vstr);
        // TODO: Optimize, borrow buffer from vstr
        len = vstr_len(vstr);
179
        o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
180
181
182
183
184
185
186
        memcpy(data, vstr_str(vstr), len);
        vstr_free(vstr);
    }

    return mp_obj_str_builder_end(o);

wrong_args:
187
        nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments"));
188
189
}

190
191
// like strstr but with specified length and allows \0 bytes
// TODO replace with something more efficient/standard
192
STATIC const byte *find_subbytes(const byte *haystack, machine_uint_t hlen, const byte *needle, machine_uint_t nlen, machine_int_t direction) {
193
    if (hlen >= nlen) {
194
195
196
197
198
199
200
201
202
203
204
205
        machine_uint_t str_index, str_index_end;
        if (direction > 0) {
            str_index = 0;
            str_index_end = hlen - nlen;
        } else {
            str_index = hlen - nlen;
            str_index_end = 0;
        }
        for (;;) {
            if (memcmp(&haystack[str_index], needle, nlen) == 0) {
                //found
                return haystack + str_index;
206
            }
207
208
209
            if (str_index == str_index_end) {
                //not found
                break;
210
            }
211
            str_index += direction;
212
213
214
215
216
        }
    }
    return NULL;
}

217
STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
218
    GET_STR_DATA_LEN(lhs_in, lhs_data, lhs_len);
219
    switch (op) {
Damien George's avatar
Damien George committed
220
        case MP_BINARY_OP_SUBSCR:
221
222
223
            // TODO: need predicate to check for int-like type (bools are such for example)
            // ["no", "yes"][1 == 2] is common idiom
            if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
224
                uint index = mp_get_index(mp_obj_get_type(lhs_in), lhs_len, rhs_in, false);
225
                if (MP_OBJ_IS_TYPE(lhs_in, &mp_type_bytes)) {
226
                    return MP_OBJ_NEW_SMALL_INT((mp_small_int_t)lhs_data[index]);
227
228
229
                } else {
                    return mp_obj_new_str(lhs_data + index, 1, true);
                }
230
#if MICROPY_ENABLE_SLICE
231
            } else if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_slice)) {
232
                machine_uint_t start, stop;
Paul Sokolovsky's avatar
Paul Sokolovsky committed
233
234
235
                if (!m_seq_get_fast_slice_indexes(lhs_len, rhs_in, &start, &stop)) {
                    assert(0);
                }
236
                return mp_obj_new_str(lhs_data + start, stop - start, false);
237
#endif
238
            } else {
239
240
                // Message doesn't match CPython, but we don't have so much bytes as they
                // to spend them on verbose wording
241
                nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "index must be int"));
242
            }
243

Damien George's avatar
Damien George committed
244
245
        case MP_BINARY_OP_ADD:
        case MP_BINARY_OP_INPLACE_ADD:
246
            if (MP_OBJ_IS_STR(rhs_in)) {
247
                // add 2 strings
248
249

                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
250
                int alloc_len = lhs_len + rhs_len;
251
252

                /* code for making qstr
253
254
255
256
                byte *q_ptr;
                byte *val = qstr_build_start(alloc_len, &q_ptr);
                memcpy(val, lhs_data, lhs_len);
                memcpy(val + lhs_len, rhs_data, rhs_len);
257
258
259
260
261
                return MP_OBJ_NEW_QSTR(qstr_build_end(q_ptr));
                */

                // code for non-qstr
                byte *data;
262
                mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), alloc_len, &data);
263
264
265
                memcpy(data, lhs_data, lhs_len);
                memcpy(data + lhs_len, rhs_data, rhs_len);
                return mp_obj_str_builder_end(s);
266
267
            }
            break;
268

Damien George's avatar
Damien George committed
269
        case MP_BINARY_OP_IN:
270
            /* NOTE `a in b` is `b.__contains__(a)` */
271
272
            if (MP_OBJ_IS_STR(rhs_in)) {
                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
273
                return MP_BOOL(find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len, 1) != NULL);
274
275
            }
            break;
276

Damien George's avatar
Damien George committed
277
        case MP_BINARY_OP_MULTIPLY:
278
279
280
281
282
        {
            if (!MP_OBJ_IS_SMALL_INT(rhs_in)) {
                return NULL;
            }
            int n = MP_OBJ_SMALL_INT_VALUE(rhs_in);
283
            byte *data;
284
            mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), lhs_len * n, &data);
285
286
            mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data);
            return mp_obj_str_builder_end(s);
287
        }
288

289
290
291
292
293
294
295
296
297
298
299
300
301
        case MP_BINARY_OP_MODULO: {
            mp_obj_t *args;
            uint n_args;
            if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_tuple)) {
                // TODO: Support tuple subclasses?
                mp_obj_tuple_get(rhs_in, &n_args, &args);
            } else {
                args = &rhs_in;
                n_args = 1;
            }
            return str_modulo_format(lhs_in, n_args, args);
        }

Damien George's avatar
Damien George committed
302
303
304
305
306
307
308
        // These 2 are never passed here, dealt with as a special case in mp_binary_op().
        //case MP_BINARY_OP_EQUAL:
        //case MP_BINARY_OP_NOT_EQUAL:
        case MP_BINARY_OP_LESS:
        case MP_BINARY_OP_LESS_EQUAL:
        case MP_BINARY_OP_MORE:
        case MP_BINARY_OP_MORE_EQUAL:
309
310
311
312
            if (MP_OBJ_IS_STR(rhs_in)) {
                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
                return MP_BOOL(mp_seq_cmp_bytes(op, lhs_data, lhs_len, rhs_data, rhs_len));
            }
313
314
315
316
317
    }

    return MP_OBJ_NULL; // op not supported
}

318
STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
319
    assert(MP_OBJ_IS_STR(self_in));
320

321
    // get separation string
322
    GET_STR_DATA_LEN(self_in, sep_str, sep_len);
323
324

    // process args
325
326
    uint seq_len;
    mp_obj_t *seq_items;
327
    if (MP_OBJ_IS_TYPE(arg, &mp_type_tuple)) {
328
        mp_obj_tuple_get(arg, &seq_len, &seq_items);
329
    } else if (MP_OBJ_IS_TYPE(arg, &mp_type_list)) {
330
331
332
333
        mp_obj_list_get(arg, &seq_len, &seq_items);
    } else {
        goto bad_arg;
    }
334
335
336

    // count required length
    int required_len = 0;
337
    for (int i = 0; i < seq_len; i++) {
338
        if (!MP_OBJ_IS_STR(seq_items[i])) {
339
340
            goto bad_arg;
        }
341
342
343
        if (i > 0) {
            required_len += sep_len;
        }
344
345
        GET_STR_LEN(seq_items[i], l);
        required_len += l;
346
347
348
    }

    // make joined string
349
    byte *data;
350
    mp_obj_t joined_str = mp_obj_str_builder_start(mp_obj_get_type(self_in), required_len, &data);
351
352
    for (int i = 0; i < seq_len; i++) {
        if (i > 0) {
353
354
            memcpy(data, sep_str, sep_len);
            data += sep_len;
355
        }
356
357
358
        GET_STR_DATA_LEN(seq_items[i], s, l);
        memcpy(data, s, l);
        data += l;
359
    }
360
361

    // return joined string
362
    return mp_obj_str_builder_end(joined_str);
363
364

bad_arg:
365
    nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "?str.join expecting a list of str's"));
366
367
}

Paul Sokolovsky's avatar
Paul Sokolovsky committed
368
369
#define is_ws(c) ((c) == ' ' || (c) == '\t')

370
STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
371
    machine_int_t splits = -1;
Paul Sokolovsky's avatar
Paul Sokolovsky committed
372
373
374
375
    mp_obj_t sep = mp_const_none;
    if (n_args > 1) {
        sep = args[1];
        if (n_args > 2) {
376
            splits = mp_obj_get_int(args[2]);
Paul Sokolovsky's avatar
Paul Sokolovsky committed
377
378
        }
    }
379

Paul Sokolovsky's avatar
Paul Sokolovsky committed
380
    mp_obj_t res = mp_obj_new_list(0, NULL);
381
382
    GET_STR_DATA_LEN(args[0], s, len);
    const byte *top = s + len;
383
384
385
386
387

    if (sep == mp_const_none) {
        // sep not given, so separate on whitespace

        // Initial whitespace is not counted as split, so we pre-do it
388
        while (s < top && is_ws(*s)) s++;
389
390
391
392
393
394
395
396
397
398
399
        while (s < top && splits != 0) {
            const byte *start = s;
            while (s < top && !is_ws(*s)) s++;
            mp_obj_list_append(res, mp_obj_new_str(start, s - start, false));
            if (s >= top) {
                break;
            }
            while (s < top && is_ws(*s)) s++;
            if (splits > 0) {
                splits--;
            }
Paul Sokolovsky's avatar
Paul Sokolovsky committed
400
401
        }

402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
        if (s < top) {
            mp_obj_list_append(res, mp_obj_new_str(s, top - s, false));
        }

    } else {
        // sep given

        uint sep_len;
        const char *sep_str = mp_obj_str_get_data(sep, &sep_len);

        if (sep_len == 0) {
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
        }

        for (;;) {
            const byte *start = s;
            for (;;) {
                if (splits == 0 || s + sep_len > top) {
                    s = top;
                    break;
                } else if (memcmp(s, sep_str, sep_len) == 0) {
                    break;
                }
                s++;
            }
            mp_obj_list_append(res, mp_obj_new_str(start, s - start, false));
            if (s >= top) {
                break;
            }
            s += sep_len;
            if (splits > 0) {
                splits--;
            }
        }
Paul Sokolovsky's avatar
Paul Sokolovsky committed
436
437
438
439
440
    }

    return res;
}

441
STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction) {
442
    assert(2 <= n_args && n_args <= 4);
443
444
    assert(MP_OBJ_IS_STR(args[0]));
    assert(MP_OBJ_IS_STR(args[1]));
445

446
447
    GET_STR_DATA_LEN(args[0], haystack, haystack_len);
    GET_STR_DATA_LEN(args[1], needle, needle_len);
448

449
450
    machine_uint_t start = 0;
    machine_uint_t end = haystack_len;
451
    if (n_args >= 3 && args[2] != mp_const_none) {
452
        start = mp_get_index(&mp_type_str, haystack_len, args[2], true);
453
454
    }
    if (n_args >= 4 && args[3] != mp_const_none) {
455
        end = mp_get_index(&mp_type_str, haystack_len, args[3], true);
456
457
    }

458
    const byte *p = find_subbytes(haystack + start, end - start, needle, needle_len, direction);
459
460
461
462
463
    if (p == NULL) {
        // not found
        return MP_OBJ_NEW_SMALL_INT(-1);
    } else {
        // found
464
        return MP_OBJ_NEW_SMALL_INT(p - haystack);
465
466
467
    }
}

468
469
470
471
472
473
474
475
STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
    return str_finder(n_args, args, 1);
}

STATIC mp_obj_t str_rfind(uint n_args, const mp_obj_t *args) {
    return str_finder(n_args, args, -1);
}

476
// TODO: (Much) more variety in args
477
STATIC mp_obj_t str_startswith(mp_obj_t self_in, mp_obj_t arg) {
478
479
480
481
482
483
484
485
    GET_STR_DATA_LEN(self_in, str, str_len);
    GET_STR_DATA_LEN(arg, prefix, prefix_len);
    if (prefix_len > str_len) {
        return mp_const_false;
    }
    return MP_BOOL(memcmp(str, prefix, prefix_len) == 0);
}

486
STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
xbe's avatar
xbe committed
487
    assert(1 <= n_args && n_args <= 2);
488
489
490
491
492
    assert(MP_OBJ_IS_STR(args[0]));

    const byte *chars_to_del;
    uint chars_to_del_len;
    static const byte whitespace[] = " \t\n\r\v\f";
xbe's avatar
xbe committed
493
494
495

    if (n_args == 1) {
        chars_to_del = whitespace;
496
        chars_to_del_len = sizeof(whitespace);
xbe's avatar
xbe committed
497
    } else {
498
499
500
501
        assert(MP_OBJ_IS_STR(args[1]));
        GET_STR_DATA_LEN(args[1], s, l);
        chars_to_del = s;
        chars_to_del_len = l;
xbe's avatar
xbe committed
502
503
    }

504
    GET_STR_DATA_LEN(args[0], orig_str, orig_str_len);
xbe's avatar
xbe committed
505

506
    machine_uint_t first_good_char_pos = 0;
xbe's avatar
xbe committed
507
    bool first_good_char_pos_set = false;
508
509
    machine_uint_t last_good_char_pos = 0;
    for (machine_uint_t i = 0; i < orig_str_len; i++) {
510
        if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) {
xbe's avatar
xbe committed
511
512
513
514
515
516
517
518
519
            last_good_char_pos = i;
            if (!first_good_char_pos_set) {
                first_good_char_pos = i;
                first_good_char_pos_set = true;
            }
        }
    }

    if (first_good_char_pos == 0 && last_good_char_pos == 0) {
520
521
        // string is all whitespace, return ''
        return MP_OBJ_NEW_QSTR(MP_QSTR_);
xbe's avatar
xbe committed
522
523
524
525
    }

    assert(last_good_char_pos >= first_good_char_pos);
    //+1 to accomodate the last character
526
    machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
527
    return mp_obj_new_str(orig_str + first_good_char_pos, stripped_len, false);
xbe's avatar
xbe committed
528
529
}

Dave Hylands's avatar
Dave Hylands committed
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
// Takes an int arg, but only parses unsigned numbers, and only changes
// *num if at least one digit was parsed.
static int str_to_int(const char *str, int *num) {
    const char *s = str;
    if (unichar_isdigit(*s)) {
        *num = 0;
        do {
            *num = *num * 10 + (*s - '0');
            s++;
        }
        while (unichar_isdigit(*s));
    }
    return s - str;
}

static bool isalignment(char ch) {
    return ch && strchr("<>=^", ch) != NULL;
}

static bool istype(char ch) {
    return ch && strchr("bcdeEfFgGnosxX%", ch) != NULL;
}

static bool arg_looks_integer(mp_obj_t arg) {
    return MP_OBJ_IS_TYPE(arg, &mp_type_bool) || MP_OBJ_IS_INT(arg);
}

static bool arg_looks_numeric(mp_obj_t arg) {
    return arg_looks_integer(arg)
#if MICROPY_ENABLE_FLOAT
        || MP_OBJ_IS_TYPE(arg, &mp_type_float)
#endif
    ;
}

565
566
567
568
569
570
571
572
573
static machine_int_t arg_as_int(mp_obj_t arg) {
#if MICROPY_ENABLE_FLOAT
    if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
        return mp_obj_get_float(arg);
    }
#endif
    return mp_obj_get_int(arg);
}

574
mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
575
    assert(MP_OBJ_IS_STR(args[0]));
576

577
    GET_STR_DATA_LEN(args[0], str, len);
Dave Hylands's avatar
Dave Hylands committed
578
    int arg_i = 0;
579
    vstr_t *vstr = vstr_new();
Dave Hylands's avatar
Dave Hylands committed
580
581
582
583
    pfenv_t pfenv_vstr;
    pfenv_vstr.data = vstr;
    pfenv_vstr.print_strn = pfenv_vstr_add_strn;

584
    for (const byte *top = str + len; str < top; str++) {
Dave Hylands's avatar
Dave Hylands committed
585
586
587
588
589
590
        if (*str == '}') {
            str++;
            if (str < top && *str == '}') {
                vstr_add_char(vstr, '}');
                continue;
            }
591
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Single '}' encountered in format string"));
Dave Hylands's avatar
Dave Hylands committed
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
        }
        if (*str != '{') {
            vstr_add_char(vstr, *str);
            continue;
        }

        str++;
        if (str < top && *str == '{') {
            vstr_add_char(vstr, '{');
            continue;
        }

        // replacement_field ::=  "{" [field_name] ["!" conversion] [":" format_spec] "}"

        vstr_t *field_name = NULL;
        char conversion = '\0';
        vstr_t *format_spec = NULL;

        if (str < top && *str != '}' && *str != '!' && *str != ':') {
            field_name = vstr_new();
            while (str < top && *str != '}' && *str != '!' && *str != ':') {
                vstr_add_char(field_name, *str++);
            }
            vstr_add_char(field_name, '\0');
        }

        // conversion ::=  "r" | "s"

        if (str < top && *str == '!') {
621
            str++;
Dave Hylands's avatar
Dave Hylands committed
622
623
            if (str < top && (*str == 'r' || *str == 's')) {
                conversion = *str++;
624
            } else {
625
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "end of format while looking for conversion specifier"));
Dave Hylands's avatar
Dave Hylands committed
626
627
628
629
630
631
632
633
634
635
636
637
638
639
            }
        }

        if (str < top && *str == ':') {
            str++;
            // {:} is the same as {}, which is the same as {!s}
            // This makes a difference when passing in a True or False
            // '{}'.format(True) returns 'True'
            // '{:d}'.format(True) returns '1'
            // So we treat {:} as {} and this later gets treated to be {!s}
            if (*str != '}') {
                format_spec = vstr_new(); 
                while (str < top && *str != '}') {
                    vstr_add_char(format_spec, *str++);
640
                }
Dave Hylands's avatar
Dave Hylands committed
641
642
643
644
                vstr_add_char(format_spec, '\0');
            }
        }
        if (str >= top) {
645
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "unmatched '{' in format"));
Dave Hylands's avatar
Dave Hylands committed
646
647
        }
        if (*str != '}') {
648
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "expected ':' after format specifier"));
Dave Hylands's avatar
Dave Hylands committed
649
650
651
652
653
654
        }

        mp_obj_t arg = mp_const_none;

        if (field_name) {
            if (arg_i > 0) {
655
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "cannot switch from automatic field numbering to manual field specification"));
Dave Hylands's avatar
Dave Hylands committed
656
657
658
            }
            int index;
            if (str_to_int(vstr_str(field_name), &index) != vstr_len(field_name) - 1) {
659
                nlr_raise(mp_obj_new_exception_msg(&mp_type_KeyError, "attributes not supported yet"));
660
            }
Dave Hylands's avatar
Dave Hylands committed
661
            if (index >= n_args - 1) {
662
                nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
Dave Hylands's avatar
Dave Hylands committed
663
664
665
666
667
            }
            arg = args[index + 1];
            arg_i = -1;
            vstr_free(field_name);
            field_name = NULL;
668
        } else {
Dave Hylands's avatar
Dave Hylands committed
669
            if (arg_i < 0) {
670
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "cannot switch from manual field specification to automatic field numbering"));
Dave Hylands's avatar
Dave Hylands committed
671
672
            }
            if (arg_i >= n_args - 1) {
673
                nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
Dave Hylands's avatar
Dave Hylands committed
674
675
676
677
678
679
680
681
682
683
684
685
686
687
            }
            arg = args[arg_i + 1];
            arg_i++;
        }
        if (!format_spec && !conversion) {
            conversion = 's';
        }
        if (conversion) {
            mp_print_kind_t print_kind;
            if (conversion == 's') {
                print_kind = PRINT_STR;
            } else if (conversion == 'r') {
                print_kind = PRINT_REPR;
            } else {
688
                nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "Unknown conversion specifier %c", conversion));
Dave Hylands's avatar
Dave Hylands committed
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
            }
            vstr_t *arg_vstr = vstr_new();
            mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, arg_vstr, arg, print_kind);
            arg = mp_obj_new_str((const byte *)vstr_str(arg_vstr), vstr_len(arg_vstr), false);
            vstr_free(arg_vstr);
        }

        char sign = '\0';
        char fill = '\0';
        char align = '\0';
        int width = -1;
        int precision = -1;
        char type = '\0';
        int flags = 0;

        if (format_spec) {
            // The format specifier (from http://docs.python.org/2/library/string.html#formatspec)
            //
            // [[fill]align][sign][#][0][width][,][.precision][type]
            // fill        ::=  <any character>
            // align       ::=  "<" | ">" | "=" | "^"
            // sign        ::=  "+" | "-" | " "
            // width       ::=  integer
            // precision   ::=  integer
            // type        ::=  "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"

            const char *s = vstr_str(format_spec);
            if (isalignment(*s)) {
                align = *s++;
            } else if (*s && isalignment(s[1])) {
                fill = *s++;
                align = *s++;
            }
            if (*s == '+' || *s == '-' || *s == ' ') {
                if (*s == '+') {
                    flags |= PF_FLAG_SHOW_SIGN;
                } else if (*s == ' ') {
                    flags |= PF_FLAG_SPACE_SIGN;
                }
                sign = *s++;
            }
            if (*s == '#') {
                flags |= PF_FLAG_SHOW_PREFIX;
                s++;
            }
            if (*s == '0') {
                if (!align) {
                    align = '=';
                }
                if (!fill) {
                    fill = '0';
                }
            }
            s += str_to_int(s, &width);
            if (*s == ',') {
                flags |= PF_FLAG_SHOW_COMMA;
                s++;
            }
            if (*s == '.') {
                s++;
                s += str_to_int(s, &precision);
            }
            if (istype(*s)) {
                type = *s++;
            }
            if (*s) {
755
                nlr_raise(mp_obj_new_exception_msg(&mp_type_KeyError, "Invalid conversion specification"));
Dave Hylands's avatar
Dave Hylands committed
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
            }
            vstr_free(format_spec);
            format_spec = NULL;
        }
        if (!align) {
            if (arg_looks_numeric(arg)) {
                align = '>';
            } else {
                align = '<';
            }
        }
        if (!fill) {
            fill = ' ';
        }

        if (sign) {
            if (type == 's') {
773
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed in string format specifier"));
Dave Hylands's avatar
Dave Hylands committed
774
775
            }
            if (type == 'c') {
776
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed with integer format specifier 'c'"));
Dave Hylands's avatar
Dave Hylands committed
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
            }
        } else {
            sign = '-';
        }

        switch (align) {
            case '<': flags |= PF_FLAG_LEFT_ADJUST;     break;
            case '=': flags |= PF_FLAG_PAD_AFTER_SIGN;  break;
            case '^': flags |= PF_FLAG_CENTER_ADJUST;   break;
        }

        if (arg_looks_integer(arg)) {
            switch (type) {
                case 'b':
                    pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 2, 'a', flags, fill, width);
                    continue;

                case 'c':
                {
                    char ch = mp_obj_get_int(arg);
                    pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, fill, width);
                    continue;
                }

                case '\0':  // No explicit format type implies 'd'
                case 'n':   // I don't think we support locales in uPy so use 'd'
                case 'd':
                    pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 10, 'a', flags, fill, width);
                    continue;

                case 'o':
                    pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 8, 'a', flags, fill, width);
                    continue;

                case 'x':
                    pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 16, 'a', flags, fill, width);
                    continue;

                case 'X':
                    pfenv_print_int(&pfenv_vstr, mp_obj_get_int(arg), 1, 16, 'A', flags, fill, width);
                    continue;

                case 'e':
                case 'E':
                case 'f':
                case 'F':
                case 'g':
                case 'G':
                case '%':
                    // The floating point formatters all work with anything that
                    // looks like an integer
                    break;

                default:
831
                    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
832
833
                        "Unknown format code '%c' for object of type '%s'", type, mp_obj_get_type_str(arg)));
            }
834
        }
835

836
837
        // NOTE: no else here. We need the e, f, g etc formats for integer
        //       arguments (from above if) to take this if.
838
        if (arg_looks_numeric(arg)) {
Dave Hylands's avatar
Dave Hylands committed
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
            if (!type) {

                // Even though the docs say that an unspecified type is the same
                // as 'g', there is one subtle difference, when the exponent
                // is one less than the precision.
                //  
                // '{:10.1}'.format(0.0) ==> '0e+00'
                // '{:10.1g}'.format(0.0) ==> '0'
                //
                // TODO: Figure out how to deal with this.
                //
                // A proper solution would involve adding a special flag
                // or something to format_float, and create a format_double
                // to deal with doubles. In order to fix this when using
                // sprintf, we'd need to use the e format and tweak the
                // returned result to strip trailing zeros like the g format
                // does.
                //
                // {:10.3} and {:10.2e} with 1.23e2 both produce 1.23e+02
                // but with 1.e2 you get 1e+02 and 1.00e+02
                //
                // Stripping the trailing 0's (like g) does would make the
                // e format give us the right format.
                //
                // CPython sources say:
                //   Omitted type specifier.  Behaves in the same way as repr(x)
                //   and str(x) if no precision is given, else like 'g', but with
                //   at least one digit after the decimal point. */

                type = 'g';
            }
            if (type == 'n') {
                type = 'g';
            }

            flags |= PF_FLAG_PAD_NAN_INF; // '{:06e}'.format(float('-inf')) should give '-00inf'
            switch (type) {
876
#if MICROPY_ENABLE_FLOAT
Dave Hylands's avatar
Dave Hylands committed
877
878
879
880
881
882
883
884
885
886
887
888
889
                case 'e':
                case 'E':
                case 'f':
                case 'F':
                case 'g':
                case 'G':
                    pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), type, flags, fill, width, precision); 
                    break;

                case '%':
                    flags |= PF_FLAG_ADD_PERCENT;
                    pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg) * 100.0F, 'f', flags, fill, width, precision);
                    break;
890
#endif
Dave Hylands's avatar
Dave Hylands committed
891
892

                default:
893
                    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
894
895
896
897
                        "Unknown format code '%c' for object of type 'float'",
                        type, mp_obj_get_type_str(arg)));
            }
        } else {
898
899
            // arg doesn't look like a number

Dave Hylands's avatar
Dave Hylands committed
900
            if (align == '=') {
901
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "'=' alignment not allowed in string format specifier"));
Dave Hylands's avatar
Dave Hylands committed
902
            }
903

Dave Hylands's avatar
Dave Hylands committed
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
            switch (type) {
                case '\0':
                    mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, arg, PRINT_STR);
                    break;

                case 's':
                {
                    uint len;
                    const char *s = mp_obj_str_get_data(arg, &len);
                    if (precision < 0) {
                        precision = len;
                    }
                    if (len > precision) {
                        len = precision;
                    }
                    pfenv_print_strn(&pfenv_vstr, s, len, flags, fill, width);
                    break;
                }

                default:
924
                    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
925
926
927
                        "Unknown format code '%c' for object of type 'str'",
                        type, mp_obj_get_type_str(arg)));
            }
928
929
930
        }
    }

931
932
933
    mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
    vstr_free(vstr);
    return s;
934
935
}

936
937
938
939
STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args) {
    assert(MP_OBJ_IS_STR(pattern));

    GET_STR_DATA_LEN(pattern, str, len);
Dave Hylands's avatar
Dave Hylands committed
940
    const byte *start_str = str;
941
942
    int arg_i = 0;
    vstr_t *vstr = vstr_new();
Dave Hylands's avatar
Dave Hylands committed
943
944
945
946
    pfenv_t pfenv_vstr;
    pfenv_vstr.data = vstr;
    pfenv_vstr.print_strn = pfenv_vstr_add_strn;

947
    for (const byte *top = str + len; str < top; str++) {
Dave Hylands's avatar
Dave Hylands committed
948
949
950
951
952
953
954
        if (*str != '%') {
            vstr_add_char(vstr, *str);
            continue;
        }
        if (++str >= top) {
            break;
        }
955
        if (*str == '%') {
Dave Hylands's avatar
Dave Hylands committed
956
957
958
959
            vstr_add_char(vstr, '%');
            continue;
        }
        if (arg_i >= n_args) {
960
            nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "not enough arguments for format string"));
Dave Hylands's avatar
Dave Hylands committed
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
        }
        int flags = 0;
        char fill = ' ';
        bool alt = false;
        while (str < top) {
            if (*str == '-')      flags |= PF_FLAG_LEFT_ADJUST;
            else if (*str == '+') flags |= PF_FLAG_SHOW_SIGN;
            else if (*str == ' ') flags |= PF_FLAG_SPACE_SIGN;
            else if (*str == '#') alt = true;
            else if (*str == '0') {
                flags |= PF_FLAG_PAD_AFTER_SIGN;
                fill = '0';
            } else break;
            str++;
        }
        // parse width, if it exists
        int width = 0; 
        if (str < top) {
            if (*str == '*') {
                width = mp_obj_get_int(args[arg_i++]);
                str++;
982
            } else {
Dave Hylands's avatar
Dave Hylands committed
983
984
                for (; str < top && '0' <= *str && *str <= '9'; str++) {
                    width = width * 10 + *str - '0';
985
                }
Dave Hylands's avatar
Dave Hylands committed
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
            }
        }
        int prec = -1;
        if (str < top && *str == '.') {
            if (++str < top) {
                if (*str == '*') {
                    prec = mp_obj_get_int(args[arg_i++]);
                    str++;
                } else {
                    prec = 0;
                    for (; str < top && '0' <= *str && *str <= '9'; str++) {
                        prec = prec * 10 + *str - '0';
                    }
                }
            }