objstr.c 55.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/*
 * This file is part of the Micro Python project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2013, 2014 Damien P. George
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

xbe's avatar
xbe committed
27
#include <stdbool.h>
28
29
30
#include <string.h>
#include <assert.h>

31
#include "mpconfig.h"
32
33
#include "nlr.h"
#include "misc.h"
34
#include "qstr.h"
35
36
37
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
Dave Hylands's avatar
Dave Hylands committed
38
#include "pfenv.h"
39
#include "objstr.h"
40

41
STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args);
42
43
const mp_obj_t mp_const_empty_bytes;

44
45
46
47
48
49
50
51
52
// use this macro to extract the string hash
#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }

// use this macro to extract the string length
#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }

// use this macro to extract the string data and length
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }

53
54
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
55
STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len);
56
STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in);
xyb's avatar
xyb committed
57
58
59
60

/******************************************************************************/
/* str                                                                        */

61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len) {
    // this escapes characters, but it will be very slow to print (calling print many times)
    bool has_single_quote = false;
    bool has_double_quote = false;
    for (const byte *s = str_data, *top = str_data + str_len; (!has_single_quote || !has_double_quote) && s < top; s++) {
        if (*s == '\'') {
            has_single_quote = true;
        } else if (*s == '"') {
            has_double_quote = true;
        }
    }
    int quote_char = '\'';
    if (has_single_quote && !has_double_quote) {
        quote_char = '"';
    }
    print(env, "%c", quote_char);
    for (const byte *s = str_data, *top = str_data + str_len; s < top; s++) {
        if (*s == quote_char) {
            print(env, "\\%c", quote_char);
        } else if (*s == '\\') {
            print(env, "\\\\");
        } else if (32 <= *s && *s <= 126) {
            print(env, "%c", *s);
        } else if (*s == '\n') {
            print(env, "\\n");
86
87
88
89
        } else if (*s == '\r') {
            print(env, "\\r");
        } else if (*s == '\t') {
            print(env, "\\t");
90
91
92
93
94
95
96
        } else {
            print(env, "\\x%02x", *s);
        }
    }
    print(env, "%c", quote_char);
}

97
STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
98
    GET_STR_DATA_LEN(self_in, str_data, str_len);
99
    bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes);
100
    if (kind == PRINT_STR && !is_bytes) {
101
        print(env, "%.*s", str_len, str_data);
102
    } else {
103
104
105
        if (is_bytes) {
            print(env, "b");
        }
106
        mp_str_print_quoted(print, env, str_data, str_len);
107
    }
108
109
}

110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
    switch (n_args) {
        case 0:
            return MP_OBJ_NEW_QSTR(MP_QSTR_);

        case 1:
        {
            vstr_t *vstr = vstr_new();
            mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR);
            mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
            vstr_free(vstr);
            return s;
        }

        case 2:
        case 3:
        {
            // TODO: validate 2nd/3rd args
128
            if (!MP_OBJ_IS_TYPE(args[0], &mp_type_bytes)) {
129
                nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected"));
130
131
132
            }
            GET_STR_DATA_LEN(args[0], str_data, str_len);
            GET_STR_HASH(args[0], str_hash);
133
            mp_obj_str_t *o = str_new(&mp_type_str, NULL, str_len);
134
135
136
137
138
139
            o->data = str_data;
            o->hash = str_hash;
            return o;
        }

        default:
140
            nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments"));
141
142
143
    }
}

144
145
146
147
148
149
150
151
152
153
154
STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
    if (n_args == 0) {
        return mp_const_empty_bytes;
    }

    if (MP_OBJ_IS_STR(args[0])) {
        if (n_args < 2 || n_args > 3) {
            goto wrong_args;
        }
        GET_STR_DATA_LEN(args[0], str_data, str_len);
        GET_STR_HASH(args[0], str_hash);
155
        mp_obj_str_t *o = str_new(&mp_type_bytes, NULL, str_len);
156
157
158
159
160
161
162
163
164
165
166
167
168
        o->data = str_data;
        o->hash = str_hash;
        return o;
    }

    if (n_args > 1) {
        goto wrong_args;
    }

    if (MP_OBJ_IS_SMALL_INT(args[0])) {
        uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
        byte *data;

169
        mp_obj_t o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
        memset(data, 0, len);
        return mp_obj_str_builder_end(o);
    }

    int len;
    byte *data;
    vstr_t *vstr = NULL;
    mp_obj_t o = NULL;
    // Try to create array of exact len if initializer len is known
    mp_obj_t len_in = mp_obj_len_maybe(args[0]);
    if (len_in == MP_OBJ_NULL) {
        len = -1;
        vstr = vstr_new();
    } else {
        len = MP_OBJ_SMALL_INT_VALUE(len_in);
185
        o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
186
187
    }

Damien George's avatar
Damien George committed
188
    mp_obj_t iterable = mp_getiter(args[0]);
189
    mp_obj_t item;
190
    while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
191
192
193
194
195
196
197
198
199
200
201
        if (len == -1) {
            vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item));
        } else {
            *data++ = MP_OBJ_SMALL_INT_VALUE(item);
        }
    }

    if (len == -1) {
        vstr_shrink(vstr);
        // TODO: Optimize, borrow buffer from vstr
        len = vstr_len(vstr);
202
        o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
203
204
205
206
207
208
209
        memcpy(data, vstr_str(vstr), len);
        vstr_free(vstr);
    }

    return mp_obj_str_builder_end(o);

wrong_args:
210
        nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments"));
211
212
}

213
214
// like strstr but with specified length and allows \0 bytes
// TODO replace with something more efficient/standard
215
STATIC const byte *find_subbytes(const byte *haystack, machine_uint_t hlen, const byte *needle, machine_uint_t nlen, machine_int_t direction) {
216
    if (hlen >= nlen) {
217
218
219
220
221
222
223
224
225
226
227
228
        machine_uint_t str_index, str_index_end;
        if (direction > 0) {
            str_index = 0;
            str_index_end = hlen - nlen;
        } else {
            str_index = hlen - nlen;
            str_index_end = 0;
        }
        for (;;) {
            if (memcmp(&haystack[str_index], needle, nlen) == 0) {
                //found
                return haystack + str_index;
229
            }
230
231
232
            if (str_index == str_index_end) {
                //not found
                break;
233
            }
234
            str_index += direction;
235
236
237
238
239
        }
    }
    return NULL;
}

240
STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
241
    GET_STR_DATA_LEN(lhs_in, lhs_data, lhs_len);
242
    switch (op) {
Damien George's avatar
Damien George committed
243
244
        case MP_BINARY_OP_ADD:
        case MP_BINARY_OP_INPLACE_ADD:
245
            if (MP_OBJ_IS_STR(rhs_in)) {
246
                // add 2 strings
247
248

                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
249
                int alloc_len = lhs_len + rhs_len;
250
251

                /* code for making qstr
252
253
254
255
                byte *q_ptr;
                byte *val = qstr_build_start(alloc_len, &q_ptr);
                memcpy(val, lhs_data, lhs_len);
                memcpy(val + lhs_len, rhs_data, rhs_len);
256
257
258
259
260
                return MP_OBJ_NEW_QSTR(qstr_build_end(q_ptr));
                */

                // code for non-qstr
                byte *data;
261
                mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), alloc_len, &data);
262
263
264
                memcpy(data, lhs_data, lhs_len);
                memcpy(data + lhs_len, rhs_data, rhs_len);
                return mp_obj_str_builder_end(s);
265
266
            }
            break;
267

Damien George's avatar
Damien George committed
268
        case MP_BINARY_OP_IN:
269
            /* NOTE `a in b` is `b.__contains__(a)` */
270
271
            if (MP_OBJ_IS_STR(rhs_in)) {
                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
272
                return MP_BOOL(find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len, 1) != NULL);
273
274
            }
            break;
275

Damien George's avatar
Damien George committed
276
        case MP_BINARY_OP_MULTIPLY:
277
278
279
280
281
        {
            if (!MP_OBJ_IS_SMALL_INT(rhs_in)) {
                return NULL;
            }
            int n = MP_OBJ_SMALL_INT_VALUE(rhs_in);
282
            byte *data;
283
            mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), lhs_len * n, &data);
284
285
            mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data);
            return mp_obj_str_builder_end(s);
286
        }
287

288
289
290
291
292
293
294
295
296
297
298
299
300
        case MP_BINARY_OP_MODULO: {
            mp_obj_t *args;
            uint n_args;
            if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_tuple)) {
                // TODO: Support tuple subclasses?
                mp_obj_tuple_get(rhs_in, &n_args, &args);
            } else {
                args = &rhs_in;
                n_args = 1;
            }
            return str_modulo_format(lhs_in, n_args, args);
        }

Damien George's avatar
Damien George committed
301
302
303
304
305
306
307
        // These 2 are never passed here, dealt with as a special case in mp_binary_op().
        //case MP_BINARY_OP_EQUAL:
        //case MP_BINARY_OP_NOT_EQUAL:
        case MP_BINARY_OP_LESS:
        case MP_BINARY_OP_LESS_EQUAL:
        case MP_BINARY_OP_MORE:
        case MP_BINARY_OP_MORE_EQUAL:
308
309
310
311
            if (MP_OBJ_IS_STR(rhs_in)) {
                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
                return MP_BOOL(mp_seq_cmp_bytes(op, lhs_data, lhs_len, rhs_data, rhs_len));
            }
312
313
    }

314
    return MP_OBJ_NOT_SUPPORTED;
315
316
}

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
    GET_STR_DATA_LEN(self_in, self_data, self_len);
    if (value == MP_OBJ_SENTINEL) {
        // load
#if MICROPY_ENABLE_SLICE
        if (MP_OBJ_IS_TYPE(index, &mp_type_slice)) {
            machine_uint_t start, stop;
            if (!m_seq_get_fast_slice_indexes(self_len, index, &start, &stop)) {
                assert(0);
            }
            return mp_obj_new_str(self_data + start, stop - start, false);
        }
#endif
        mp_obj_type_t *type = mp_obj_get_type(self_in);
        uint index_val = mp_get_index(type, self_len, index, false);
        if (type == &mp_type_bytes) {
            return MP_OBJ_NEW_SMALL_INT((mp_small_int_t)self_data[index_val]);
        } else {
            return mp_obj_new_str(self_data + index_val, 1, true);
        }
    } else {
        return MP_OBJ_NOT_SUPPORTED;
    }
}

342
STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
343
    assert(MP_OBJ_IS_STR(self_in));
344

345
    // get separation string
346
    GET_STR_DATA_LEN(self_in, sep_str, sep_len);
347
348

    // process args
349
350
    uint seq_len;
    mp_obj_t *seq_items;
351
    if (MP_OBJ_IS_TYPE(arg, &mp_type_tuple)) {
352
353
        mp_obj_tuple_get(arg, &seq_len, &seq_items);
    } else {
354
355
        if (!MP_OBJ_IS_TYPE(arg, &mp_type_list)) {
            // arg is not a list, try to convert it to one
356
            // TODO: Try to optimize?
357
358
359
            arg = mp_type_list.make_new((mp_obj_t)&mp_type_list, 1, 0, &arg);
        }
        mp_obj_list_get(arg, &seq_len, &seq_items);
360
    }
361
362
363

    // count required length
    int required_len = 0;
364
    for (int i = 0; i < seq_len; i++) {
365
        if (!MP_OBJ_IS_STR(seq_items[i])) {
366
            nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "join expected a list of str's"));
367
        }
368
369
370
        if (i > 0) {
            required_len += sep_len;
        }
371
372
        GET_STR_LEN(seq_items[i], l);
        required_len += l;
373
374
375
    }

    // make joined string
376
    byte *data;
377
    mp_obj_t joined_str = mp_obj_str_builder_start(mp_obj_get_type(self_in), required_len, &data);
378
379
    for (int i = 0; i < seq_len; i++) {
        if (i > 0) {
380
381
            memcpy(data, sep_str, sep_len);
            data += sep_len;
382
        }
383
384
385
        GET_STR_DATA_LEN(seq_items[i], s, l);
        memcpy(data, s, l);
        data += l;
386
    }
387
388

    // return joined string
389
    return mp_obj_str_builder_end(joined_str);
390
391
}

Paul Sokolovsky's avatar
Paul Sokolovsky committed
392
393
#define is_ws(c) ((c) == ' ' || (c) == '\t')

394
STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
395
    machine_int_t splits = -1;
Paul Sokolovsky's avatar
Paul Sokolovsky committed
396
397
398
399
    mp_obj_t sep = mp_const_none;
    if (n_args > 1) {
        sep = args[1];
        if (n_args > 2) {
400
            splits = mp_obj_get_int(args[2]);
Paul Sokolovsky's avatar
Paul Sokolovsky committed
401
402
        }
    }
403

Paul Sokolovsky's avatar
Paul Sokolovsky committed
404
    mp_obj_t res = mp_obj_new_list(0, NULL);
405
406
    GET_STR_DATA_LEN(args[0], s, len);
    const byte *top = s + len;
407
408
409
410
411

    if (sep == mp_const_none) {
        // sep not given, so separate on whitespace

        // Initial whitespace is not counted as split, so we pre-do it
412
        while (s < top && is_ws(*s)) s++;
413
414
415
416
417
418
419
420
421
422
423
        while (s < top && splits != 0) {
            const byte *start = s;
            while (s < top && !is_ws(*s)) s++;
            mp_obj_list_append(res, mp_obj_new_str(start, s - start, false));
            if (s >= top) {
                break;
            }
            while (s < top && is_ws(*s)) s++;
            if (splits > 0) {
                splits--;
            }
Paul Sokolovsky's avatar
Paul Sokolovsky committed
424
425
        }

426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
        if (s < top) {
            mp_obj_list_append(res, mp_obj_new_str(s, top - s, false));
        }

    } else {
        // sep given

        uint sep_len;
        const char *sep_str = mp_obj_str_get_data(sep, &sep_len);

        if (sep_len == 0) {
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
        }

        for (;;) {
            const byte *start = s;
            for (;;) {
                if (splits == 0 || s + sep_len > top) {
                    s = top;
                    break;
                } else if (memcmp(s, sep_str, sep_len) == 0) {
                    break;
                }
                s++;
            }
            mp_obj_list_append(res, mp_obj_new_str(start, s - start, false));
            if (s >= top) {
                break;
            }
            s += sep_len;
            if (splits > 0) {
                splits--;
            }
        }
Paul Sokolovsky's avatar
Paul Sokolovsky committed
460
461
462
463
464
    }

    return res;
}

465
STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction, bool is_index) {
466
    assert(2 <= n_args && n_args <= 4);
467
468
    assert(MP_OBJ_IS_STR(args[0]));
    assert(MP_OBJ_IS_STR(args[1]));
469

470
471
    GET_STR_DATA_LEN(args[0], haystack, haystack_len);
    GET_STR_DATA_LEN(args[1], needle, needle_len);
472

473
474
    machine_uint_t start = 0;
    machine_uint_t end = haystack_len;
475
    if (n_args >= 3 && args[2] != mp_const_none) {
476
        start = mp_get_index(&mp_type_str, haystack_len, args[2], true);
477
478
    }
    if (n_args >= 4 && args[3] != mp_const_none) {
479
        end = mp_get_index(&mp_type_str, haystack_len, args[3], true);
480
481
    }

482
    const byte *p = find_subbytes(haystack + start, end - start, needle, needle_len, direction);
483
484
    if (p == NULL) {
        // not found
485
486
487
488
489
        if (is_index) {
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "substring not found"));
        } else {
            return MP_OBJ_NEW_SMALL_INT(-1);
        }
490
491
    } else {
        // found
492
        return MP_OBJ_NEW_SMALL_INT(p - haystack);
493
494
495
    }
}

496
STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
497
    return str_finder(n_args, args, 1, false);
498
499
500
}

STATIC mp_obj_t str_rfind(uint n_args, const mp_obj_t *args) {
501
502
503
504
505
506
507
508
509
    return str_finder(n_args, args, -1, false);
}

STATIC mp_obj_t str_index(uint n_args, const mp_obj_t *args) {
    return str_finder(n_args, args, 1, true);
}

STATIC mp_obj_t str_rindex(uint n_args, const mp_obj_t *args) {
    return str_finder(n_args, args, -1, true);
510
511
}

512
// TODO: (Much) more variety in args
513
STATIC mp_obj_t str_startswith(mp_obj_t self_in, mp_obj_t arg) {
514
515
516
517
518
519
520
521
    GET_STR_DATA_LEN(self_in, str, str_len);
    GET_STR_DATA_LEN(arg, prefix, prefix_len);
    if (prefix_len > str_len) {
        return mp_const_false;
    }
    return MP_BOOL(memcmp(str, prefix, prefix_len) == 0);
}

522
523
524
enum { LSTRIP, RSTRIP, STRIP };

STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
xbe's avatar
xbe committed
525
    assert(1 <= n_args && n_args <= 2);
526
527
528
529
530
    assert(MP_OBJ_IS_STR(args[0]));

    const byte *chars_to_del;
    uint chars_to_del_len;
    static const byte whitespace[] = " \t\n\r\v\f";
xbe's avatar
xbe committed
531
532
533

    if (n_args == 1) {
        chars_to_del = whitespace;
534
        chars_to_del_len = sizeof(whitespace);
xbe's avatar
xbe committed
535
    } else {
536
537
538
539
        assert(MP_OBJ_IS_STR(args[1]));
        GET_STR_DATA_LEN(args[1], s, l);
        chars_to_del = s;
        chars_to_del_len = l;
xbe's avatar
xbe committed
540
541
    }

542
    GET_STR_DATA_LEN(args[0], orig_str, orig_str_len);
xbe's avatar
xbe committed
543

544
    machine_uint_t first_good_char_pos = 0;
xbe's avatar
xbe committed
545
    bool first_good_char_pos_set = false;
546
    machine_uint_t last_good_char_pos = 0;
547
548
549
550
551
552
553
    machine_uint_t i = 0;
    machine_int_t delta = 1;
    if (type == RSTRIP) {
        i = orig_str_len - 1;
        delta = -1;
    }
    for (machine_uint_t len = orig_str_len; len > 0; len--) {
554
        if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) {
xbe's avatar
xbe committed
555
556
            if (!first_good_char_pos_set) {
                first_good_char_pos = i;
557
558
559
                if (type == LSTRIP) {
                    last_good_char_pos = orig_str_len - 1;
                    break;
560
561
562
563
                } else if (type == RSTRIP) {
                    first_good_char_pos = 0;
                    last_good_char_pos = i;
                    break;
564
                }
xbe's avatar
xbe committed
565
566
                first_good_char_pos_set = true;
            }
567
            last_good_char_pos = i;
xbe's avatar
xbe committed
568
        }
569
        i += delta;
xbe's avatar
xbe committed
570
571
572
    }

    if (first_good_char_pos == 0 && last_good_char_pos == 0) {
573
574
        // string is all whitespace, return ''
        return MP_OBJ_NEW_QSTR(MP_QSTR_);
xbe's avatar
xbe committed
575
576
577
578
    }

    assert(last_good_char_pos >= first_good_char_pos);
    //+1 to accomodate the last character
579
    machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
580
    return mp_obj_new_str(orig_str + first_good_char_pos, stripped_len, false);
xbe's avatar
xbe committed
581
582
}

583
584
585
586
587
588
589
590
591
592
593
594
STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
    return str_uni_strip(STRIP, n_args, args);
}

STATIC mp_obj_t str_lstrip(uint n_args, const mp_obj_t *args) {
    return str_uni_strip(LSTRIP, n_args, args);
}

STATIC mp_obj_t str_rstrip(uint n_args, const mp_obj_t *args) {
    return str_uni_strip(RSTRIP, n_args, args);
}

Dave Hylands's avatar
Dave Hylands committed
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
// Takes an int arg, but only parses unsigned numbers, and only changes
// *num if at least one digit was parsed.
static int str_to_int(const char *str, int *num) {
    const char *s = str;
    if (unichar_isdigit(*s)) {
        *num = 0;
        do {
            *num = *num * 10 + (*s - '0');
            s++;
        }
        while (unichar_isdigit(*s));
    }
    return s - str;
}

static bool isalignment(char ch) {
    return ch && strchr("<>=^", ch) != NULL;
}

static bool istype(char ch) {
    return ch && strchr("bcdeEfFgGnosxX%", ch) != NULL;
}

static bool arg_looks_integer(mp_obj_t arg) {
    return MP_OBJ_IS_TYPE(arg, &mp_type_bool) || MP_OBJ_IS_INT(arg);
}

static bool arg_looks_numeric(mp_obj_t arg) {
    return arg_looks_integer(arg)
#if MICROPY_ENABLE_FLOAT
        || MP_OBJ_IS_TYPE(arg, &mp_type_float)
#endif
    ;
}

630
static mp_obj_t arg_as_int(mp_obj_t arg) {
631
632
#if MICROPY_ENABLE_FLOAT
    if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
633
634
635
636
637

        // TODO: Needs a way to construct an mpz integer from a float

        mp_small_int_t num = mp_obj_get_float(arg);
        return MP_OBJ_NEW_SMALL_INT(num);
638
639
    }
#endif
640
    return arg;
641
642
}

643
mp_obj_t mp_obj_str_format(uint n_args, const mp_obj_t *args) {
644
    assert(MP_OBJ_IS_STR(args[0]));
645

646
    GET_STR_DATA_LEN(args[0], str, len);
Dave Hylands's avatar
Dave Hylands committed
647
    int arg_i = 0;
648
    vstr_t *vstr = vstr_new();
Dave Hylands's avatar
Dave Hylands committed
649
650
651
652
    pfenv_t pfenv_vstr;
    pfenv_vstr.data = vstr;
    pfenv_vstr.print_strn = pfenv_vstr_add_strn;

653
    for (const byte *top = str + len; str < top; str++) {
Dave Hylands's avatar
Dave Hylands committed
654
655
656
657
658
659
        if (*str == '}') {
            str++;
            if (str < top && *str == '}') {
                vstr_add_char(vstr, '}');
                continue;
            }
660
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Single '}' encountered in format string"));
Dave Hylands's avatar
Dave Hylands committed
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
        }
        if (*str != '{') {
            vstr_add_char(vstr, *str);
            continue;
        }

        str++;
        if (str < top && *str == '{') {
            vstr_add_char(vstr, '{');
            continue;
        }

        // replacement_field ::=  "{" [field_name] ["!" conversion] [":" format_spec] "}"

        vstr_t *field_name = NULL;
        char conversion = '\0';
        vstr_t *format_spec = NULL;

        if (str < top && *str != '}' && *str != '!' && *str != ':') {
            field_name = vstr_new();
            while (str < top && *str != '}' && *str != '!' && *str != ':') {
                vstr_add_char(field_name, *str++);
            }
            vstr_add_char(field_name, '\0');
        }

        // conversion ::=  "r" | "s"

        if (str < top && *str == '!') {
690
            str++;
Dave Hylands's avatar
Dave Hylands committed
691
692
            if (str < top && (*str == 'r' || *str == 's')) {
                conversion = *str++;
693
            } else {
694
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "end of format while looking for conversion specifier"));
Dave Hylands's avatar
Dave Hylands committed
695
696
697
698
699
700
701
702
703
704
705
706
707
708
            }
        }

        if (str < top && *str == ':') {
            str++;
            // {:} is the same as {}, which is the same as {!s}
            // This makes a difference when passing in a True or False
            // '{}'.format(True) returns 'True'
            // '{:d}'.format(True) returns '1'
            // So we treat {:} as {} and this later gets treated to be {!s}
            if (*str != '}') {
                format_spec = vstr_new(); 
                while (str < top && *str != '}') {
                    vstr_add_char(format_spec, *str++);
709
                }
Dave Hylands's avatar
Dave Hylands committed
710
711
712
713
                vstr_add_char(format_spec, '\0');
            }
        }
        if (str >= top) {
714
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "unmatched '{' in format"));
Dave Hylands's avatar
Dave Hylands committed
715
716
        }
        if (*str != '}') {
717
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "expected ':' after format specifier"));
Dave Hylands's avatar
Dave Hylands committed
718
719
720
721
722
723
        }

        mp_obj_t arg = mp_const_none;

        if (field_name) {
            if (arg_i > 0) {
724
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "cannot switch from automatic field numbering to manual field specification"));
Dave Hylands's avatar
Dave Hylands committed
725
            }
726
            int index = 0;
Dave Hylands's avatar
Dave Hylands committed
727
            if (str_to_int(vstr_str(field_name), &index) != vstr_len(field_name) - 1) {
728
                nlr_raise(mp_obj_new_exception_msg(&mp_type_KeyError, "attributes not supported yet"));
729
            }
Dave Hylands's avatar
Dave Hylands committed
730
            if (index >= n_args - 1) {
731
                nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
Dave Hylands's avatar
Dave Hylands committed
732
733
734
735
736
            }
            arg = args[index + 1];
            arg_i = -1;
            vstr_free(field_name);
            field_name = NULL;
737
        } else {
Dave Hylands's avatar
Dave Hylands committed
738
            if (arg_i < 0) {
739
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "cannot switch from manual field specification to automatic field numbering"));
Dave Hylands's avatar
Dave Hylands committed
740
741
            }
            if (arg_i >= n_args - 1) {
742
                nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
Dave Hylands's avatar
Dave Hylands committed
743
744
745
746
747
748
749
750
751
752
753
754
755
756
            }
            arg = args[arg_i + 1];
            arg_i++;
        }
        if (!format_spec && !conversion) {
            conversion = 's';
        }
        if (conversion) {
            mp_print_kind_t print_kind;
            if (conversion == 's') {
                print_kind = PRINT_STR;
            } else if (conversion == 'r') {
                print_kind = PRINT_REPR;
            } else {
757
                nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "Unknown conversion specifier %c", conversion));
Dave Hylands's avatar
Dave Hylands committed
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
            }
            vstr_t *arg_vstr = vstr_new();
            mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, arg_vstr, arg, print_kind);
            arg = mp_obj_new_str((const byte *)vstr_str(arg_vstr), vstr_len(arg_vstr), false);
            vstr_free(arg_vstr);
        }

        char sign = '\0';
        char fill = '\0';
        char align = '\0';
        int width = -1;
        int precision = -1;
        char type = '\0';
        int flags = 0;

        if (format_spec) {
            // The format specifier (from http://docs.python.org/2/library/string.html#formatspec)
            //
            // [[fill]align][sign][#][0][width][,][.precision][type]
            // fill        ::=  <any character>
            // align       ::=  "<" | ">" | "=" | "^"
            // sign        ::=  "+" | "-" | " "
            // width       ::=  integer
            // precision   ::=  integer
            // type        ::=  "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"

            const char *s = vstr_str(format_spec);
            if (isalignment(*s)) {
                align = *s++;
            } else if (*s && isalignment(s[1])) {
                fill = *s++;
                align = *s++;
            }
            if (*s == '+' || *s == '-' || *s == ' ') {
                if (*s == '+') {
                    flags |= PF_FLAG_SHOW_SIGN;
                } else if (*s == ' ') {
                    flags |= PF_FLAG_SPACE_SIGN;
                }
                sign = *s++;
            }
            if (*s == '#') {
                flags |= PF_FLAG_SHOW_PREFIX;
                s++;
            }
            if (*s == '0') {
                if (!align) {
                    align = '=';
                }
                if (!fill) {
                    fill = '0';
                }
            }
            s += str_to_int(s, &width);
            if (*s == ',') {
                flags |= PF_FLAG_SHOW_COMMA;
                s++;
            }
            if (*s == '.') {
                s++;
                s += str_to_int(s, &precision);
            }
            if (istype(*s)) {
                type = *s++;
            }
            if (*s) {
824
                nlr_raise(mp_obj_new_exception_msg(&mp_type_KeyError, "Invalid conversion specification"));
Dave Hylands's avatar
Dave Hylands committed
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
            }
            vstr_free(format_spec);
            format_spec = NULL;
        }
        if (!align) {
            if (arg_looks_numeric(arg)) {
                align = '>';
            } else {
                align = '<';
            }
        }
        if (!fill) {
            fill = ' ';
        }

        if (sign) {
            if (type == 's') {
842
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed in string format specifier"));
Dave Hylands's avatar
Dave Hylands committed
843
844
            }
            if (type == 'c') {
845
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed with integer format specifier 'c'"));
Dave Hylands's avatar
Dave Hylands committed
846
847
848
849
850
851
852
853
854
855
856
857
858
859
            }
        } else {
            sign = '-';
        }

        switch (align) {
            case '<': flags |= PF_FLAG_LEFT_ADJUST;     break;
            case '=': flags |= PF_FLAG_PAD_AFTER_SIGN;  break;
            case '^': flags |= PF_FLAG_CENTER_ADJUST;   break;
        }

        if (arg_looks_integer(arg)) {
            switch (type) {
                case 'b':
860
                    pfenv_print_mp_int(&pfenv_vstr, arg, 1, 2, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
861
862
863
864
865
866
867
868
869
870
871
872
                    continue;

                case 'c':
                {
                    char ch = mp_obj_get_int(arg);
                    pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, fill, width);
                    continue;
                }

                case '\0':  // No explicit format type implies 'd'
                case 'n':   // I don't think we support locales in uPy so use 'd'
                case 'd':
873
                    pfenv_print_mp_int(&pfenv_vstr, arg, 1, 10, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
874
875
876
                    continue;

                case 'o':
877
878
879
880
                    if (flags & PF_FLAG_SHOW_PREFIX) {
                        flags |= PF_FLAG_SHOW_OCTAL_LETTER;
                    }

881
                    pfenv_print_mp_int(&pfenv_vstr, arg, 1, 8, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
882
883
884
                    continue;

                case 'x':
885
                    pfenv_print_mp_int(&pfenv_vstr, arg, 1, 16, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
886
887
888
                    continue;

                case 'X':
889
                    pfenv_print_mp_int(&pfenv_vstr, arg, 1, 16, 'A', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
890
891
892
893
894
895
896
897
898
899
900
901
902
903
                    continue;

                case 'e':
                case 'E':
                case 'f':
                case 'F':
                case 'g':
                case 'G':
                case '%':
                    // The floating point formatters all work with anything that
                    // looks like an integer
                    break;

                default:
904
                    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
905
906
                        "Unknown format code '%c' for object of type '%s'", type, mp_obj_get_type_str(arg)));
            }
907
        }
908

909
910
        // NOTE: no else here. We need the e, f, g etc formats for integer
        //       arguments (from above if) to take this if.
911
        if (arg_looks_numeric(arg)) {
Dave Hylands's avatar
Dave Hylands committed
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
            if (!type) {

                // Even though the docs say that an unspecified type is the same
                // as 'g', there is one subtle difference, when the exponent
                // is one less than the precision.
                //  
                // '{:10.1}'.format(0.0) ==> '0e+00'
                // '{:10.1g}'.format(0.0) ==> '0'
                //
                // TODO: Figure out how to deal with this.
                //
                // A proper solution would involve adding a special flag
                // or something to format_float, and create a format_double
                // to deal with doubles. In order to fix this when using
                // sprintf, we'd need to use the e format and tweak the
                // returned result to strip trailing zeros like the g format
                // does.
                //
                // {:10.3} and {:10.2e} with 1.23e2 both produce 1.23e+02
                // but with 1.e2 you get 1e+02 and 1.00e+02
                //
                // Stripping the trailing 0's (like g) does would make the
                // e format give us the right format.
                //
                // CPython sources say:
                //   Omitted type specifier.  Behaves in the same way as repr(x)
                //   and str(x) if no precision is given, else like 'g', but with
                //   at least one digit after the decimal point. */

                type = 'g';
            }
            if (type == 'n') {
                type = 'g';
            }

            flags |= PF_FLAG_PAD_NAN_INF; // '{:06e}'.format(float('-inf')) should give '-00inf'
            switch (type) {
949
#if MICROPY_ENABLE_FLOAT
Dave Hylands's avatar
Dave Hylands committed
950
951
952
953
954
955
956
957
958
959
960
961
962
                case 'e':
                case 'E':
                case 'f':
                case 'F':
                case 'g':
                case 'G':
                    pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), type, flags, fill, width, precision); 
                    break;

                case '%':
                    flags |= PF_FLAG_ADD_PERCENT;
                    pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg) * 100.0F, 'f', flags, fill, width, precision);
                    break;
963
#endif
Dave Hylands's avatar
Dave Hylands committed
964
965

                default:
966
                    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
967
968
969
970
                        "Unknown format code '%c' for object of type 'float'",
                        type, mp_obj_get_type_str(arg)));
            }
        } else {
971
972
            // arg doesn't look like a number

Dave Hylands's avatar
Dave Hylands committed
973
            if (align == '=') {
974
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "'=' alignment not allowed in string format specifier"));
Dave Hylands's avatar
Dave Hylands committed
975
            }
976

Dave Hylands's avatar
Dave Hylands committed
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
            switch (type) {
                case '\0':
                    mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, arg, PRINT_STR);
                    break;

                case 's':
                {
                    uint len;
                    const char *s = mp_obj_str_get_data(arg, &len);
                    if (precision < 0) {
                        precision = len;
                    }
                    if (len > precision) {
                        len = precision;
                    }
                    pfenv_print_strn(&pfenv_vstr, s, len, flags, fill, width);
                    break;
                }

                default:
997
                    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
998
999
1000
                        "Unknown format code '%c' for object of type 'str'",
                        type, mp_obj_get_type_str(arg)));
            }
1001
1002
1003
        }
    }

1004
1005
1006
    mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
    vstr_free(vstr);
    return s;
1007
1008
}

1009
1010
1011
1012
STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args) {
    assert(MP_OBJ_IS_STR(pattern));

    GET_STR_DATA_LEN(pattern, str, len);
Dave Hylands's avatar
Dave Hylands committed
1013
    const byte *start_str = str;
1014
1015
    int arg_i = 0;
    vstr_t *vstr = vstr_new();
Dave Hylands's avatar
Dave Hylands committed
1016
1017
1018
1019
    pfenv_t pfenv_vstr;
    pfenv_vstr.data = vstr;
    pfenv_vstr.print_strn = pfenv_vstr_add_strn;

1020
    for (const byte *top = str + len; str < top; str++) {
Dave Hylands's avatar
Dave Hylands committed
1021
1022
1023
1024
1025
1026
1027
        if (*str != '%') {
            vstr_add_char(vstr, *str);
            continue;
        }
        if (++str >= top) {
            break;
        }
1028
        if (*str == '%') {
Dave Hylands's avatar
Dave Hylands committed
1029
1030
1031
1032
            vstr_add_char(vstr, '%');
            continue;
        }
        if (arg_i >= n_args) {
1033
            nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "not enough arguments for format string"));
Dave Hylands's avatar
Dave Hylands committed
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
        }
        int flags = 0;
        char fill = ' ';
        bool alt = false;
        while (str < top) {
            if (*str == '-')      flags |= PF_FLAG_LEFT_ADJUST;
            else if (*str == '+') flags |= PF_FLAG_SHOW_SIGN;
            else if (*str == ' ') flags |= PF_FLAG_SPACE_SIGN;
            else if (*str == '#') alt = true;
            else if (*str == '0') {
                flags |= PF_FLAG_PAD_AFTER_SIGN;
                fill = '0';
            } else break;
            str++;
        }
        // parse width, if it exists
        int width = 0; 
        if (str < top) {
            if (*str == '*') {
                width = mp_obj_get_int(args[arg_i++]);
                str++;
1055
            } else {
Dave Hylands's avatar
Dave Hylands committed
1056
1057
                for (; str < top && '0' <= *str && *str <= '9'; str++) {
                    width = width * 10 + *str - '0';
1058
                }
Dave Hylands's avatar
Dave Hylands committed
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
            }
        }
        int prec = -1;
        if (str < top && *str == '.') {
            if (++str < top) {
                if (*str == '*') {
                    prec = mp_obj_get_int(args[arg_i++]);
                    str++;
                } else {
                    prec = 0;
                    for (; str < top && '0' <= *str && *str <= '9'; str++) {
                        prec = prec * 10 + *str - '0';
                    }
                }
            }
        }

        if (str >= top) {
1077
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "incomplete format"));
Dave Hylands's avatar
Dave Hylands committed
1078
1079
1080
1081
1082
1083
1084
1085
        }
        mp_obj_t arg = args[arg_i];
        switch (*str) {
            case 'c':
                if (MP_OBJ_IS_STR(arg)) {
                    uint len;
                    const char *s = mp_obj_str_get_data(arg, &len);
                    if (len != 1) {
1086
                        nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "%c requires int or char")); 
1087
                        break;
Dave Hylands's avatar
Dave Hylands committed
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
                    }
                    pfenv_print_strn(&pfenv_vstr, s, 1, flags, ' ', width);
                    break;
                }
                if (arg_looks_integer(arg)) {
                    char ch = mp_obj_get_int(arg);
                    pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, ' ', width);
                    break;
                }
#if MICROPY_ENABLE_FLOAT
                // This is what CPython reports, so we report the same.
                if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
1100
                    nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "integer argument expected, got float")); 
Dave Hylands's avatar
Dave Hylands committed
1101
1102
1103

                }
#endif
1104
                nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "an integer is required")); 
Dave Hylands's avatar
Dave Hylands committed
1105
1106
1107
1108
1109
                break; 

            case 'd':
            case 'i':
            case 'u':
1110
                pfenv_print_mp_int(&pfenv_vstr, arg_as_int(arg), 1, 10, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
                break;

#if MICROPY_ENABLE_FLOAT
            case 'e':
            case 'E':
            case 'f':
            case 'F':
            case 'g':
            case 'G':
                pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), *str, flags, fill, width, prec);
                break;
#endif

            case 'o':
                if (alt) {
1126
                    flags |= (PF_FLAG_SHOW_PREFIX | PF_FLAG_SHOW_OCTAL_LETTER);
Dave Hylands's avatar
Dave Hylands committed
1127
                }
1128
                pfenv_print_mp_int(&pfenv_vstr, arg_as_int(arg), 1, 8, 'a', flags, fill, width); 
Dave Hylands's avatar
Dave Hylands committed
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
                break;

            case 'r':
            case 's':
            {
                vstr_t *arg_vstr = vstr_new();
                mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf,
                                    arg_vstr, arg, *str == 'r' ? PRINT_REPR : PRINT_STR);
                uint len = vstr_len(arg_vstr);
                if (prec < 0) {
                    prec = len;
                }
                if (len > prec) {
                    len = prec;
1143
                }
Dave Hylands's avatar
Dave Hylands committed
1144
1145
1146
                pfenv_print_strn(&pfenv_vstr, vstr_str(arg_vstr), len, flags, ' ', width);
                vstr_free(arg_vstr);
                break;
1147
            }
Dave Hylands's avatar
Dave Hylands committed
1148
1149
1150
1151
1152

            case 'x':
                if (alt) {
                    flags |= PF_FLAG_SHOW_PREFIX;
                }
1153
                pfenv_print_mp_int(&pfenv_vstr, arg_as_int(arg), 1, 16, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
1154
1155
1156
1157
1158
1159
                break;

            case 'X':
                if (alt) {
                    flags |= PF_FLAG_SHOW_PREFIX;
                }
1160
                pfenv_print_mp_int(&pfenv_vstr, arg_as_int(arg), 1, 16, 'A', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
1161
                break;
1162

Dave Hylands's avatar
Dave Hylands committed
1163
            default:
1164
                nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
1165
1166
                    "unsupported format character '%c' (0x%x) at index %d",
                    *str, *str, str - start_str));
1167
        }
Dave Hylands's avatar
Dave Hylands committed
1168
        arg_i++;
1169
1170
1171
    }

    if (arg_i != n_args) {
1172
        nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "not all arguments converted during string formatting"));
1173
1174
1175
1176
1177
1178
1179
    }

    mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
    vstr_free(vstr);
    return s;
}

1180
STATIC mp_obj_t str_replace(uint n_args, const mp_obj_t *args) {
1181
1182
    assert(MP_OBJ_IS_STR(args[0]));

1183
    machine_int_t max_rep = -1;
1184
    if (n_args == 4) {
1185
        max_rep = mp_obj_get_int(args[3]);
1186
1187
1188
        if (max_rep == 0) {
            return args[0];
        } else if (max_rep < 0) {
1189
            max_rep = -1;
1190
        }
1191
    }
1192

xbe's avatar
xbe committed
1193
    // if max_rep is still -1 by this point we will need to do all possible replacements
1194

1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
    // check argument types

    if (!MP_OBJ_IS_STR(args[1])) {
        bad_implicit_conversion(args[1]);
    }

    if (!MP_OBJ_IS_STR(args[2])) {
        bad_implicit_conversion(args[2]);
    }

    // extract string data

1207
1208
1209
    GET_STR_DATA_LEN(args[0], str, str_len);
    GET_STR_DATA_LEN(args[1], old, old_len);
    GET_STR_DATA_LEN(args[2], new, new_len);
1210
1211

    // old won't exist in str if it's longer, so nothing to replace
1212
    if (old_len > str_len) {
1213
        return args[0];
1214
1215
    }

1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
    // data for the replaced string
    byte *data = NULL;
    mp_obj_t replaced_str = MP_OBJ_NULL;

    // do 2 passes over the string:
    //   first pass computes the required length of the replaced string
    //   second pass does the replacements
    for (;;) {
        machine_uint_t replaced_str_index = 0;
        machine_uint_t num_replacements_done = 0;
        const byte *old_occurrence;
        const byte *offset_ptr = str;
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
        machine_uint_t str_len_remain = str_len;
        if (old_len == 0) {
            // if old_str is empty, copy new_str to start of replaced string
            // copy the replacement string
            if (data != NULL) {
                memcpy(data, new, new_len);
            }
            replaced_str_index += new_len;
            num_replacements_done++;
        }
        while (num_replacements_done != max_rep && str_len_remain > 0 && (old_occurrence = find_subbytes(offset_ptr, str_len_remain, old, old_len, 1)) != NULL) {
            if (old_len == 0) {
                old_occurrence += 1;
            }
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
            // copy from just after end of last occurrence of to-be-replaced string to right before start of next occurrence
            if (data != NULL) {
                memcpy(data + replaced_str_index, offset_ptr, old_occurrence - offset_ptr);
            }
            replaced_str_index += old_occurrence - offset_ptr;
            // copy the replacement string
            if (data != NULL) {
                memcpy(data + replaced_str_index, new, new_len);
            }
            replaced_str_index += new_len;
            offset_ptr = old_occurrence + old_len;
1253
            str_len_remain = str + str_len - offset_ptr;
1254
1255
1256
1257
1258
            num_replacements_done++;
        }

        // copy from just after end of last occurrence of to-be-replaced string to end of old string
        if (data != NULL) {
1259
            memcpy(data + replaced_str_index, offset_ptr, str_len_remain);
1260
        }
1261
        replaced_str_index += str_len_remain;
1262
1263
1264
1265
1266
1267
1268
1269
1270

        if (data == NULL) {
            // first pass