objstr.c 55.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/*
 * This file is part of the Micro Python project, http://micropython.org/
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2013, 2014 Damien P. George
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

xbe's avatar
xbe committed
27
#include <stdbool.h>
28
29
30
#include <string.h>
#include <assert.h>

31
#include "mpconfig.h"
32
33
#include "nlr.h"
#include "misc.h"
34
#include "qstr.h"
35
36
37
#include "obj.h"
#include "runtime0.h"
#include "runtime.h"
Dave Hylands's avatar
Dave Hylands committed
38
#include "pfenv.h"
39
#include "objstr.h"
40

41
STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args);
42
43
const mp_obj_t mp_const_empty_bytes;

44
45
46
47
48
49
50
51
52
// use this macro to extract the string hash
#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }

// use this macro to extract the string length
#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }

// use this macro to extract the string data and length
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }

53
54
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
55
STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len);
56
STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in);
xyb's avatar
xyb committed
57
58
59
60

/******************************************************************************/
/* str                                                                        */

61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len) {
    // this escapes characters, but it will be very slow to print (calling print many times)
    bool has_single_quote = false;
    bool has_double_quote = false;
    for (const byte *s = str_data, *top = str_data + str_len; (!has_single_quote || !has_double_quote) && s < top; s++) {
        if (*s == '\'') {
            has_single_quote = true;
        } else if (*s == '"') {
            has_double_quote = true;
        }
    }
    int quote_char = '\'';
    if (has_single_quote && !has_double_quote) {
        quote_char = '"';
    }
    print(env, "%c", quote_char);
    for (const byte *s = str_data, *top = str_data + str_len; s < top; s++) {
        if (*s == quote_char) {
            print(env, "\\%c", quote_char);
        } else if (*s == '\\') {
            print(env, "\\\\");
        } else if (32 <= *s && *s <= 126) {
            print(env, "%c", *s);
        } else if (*s == '\n') {
            print(env, "\\n");
86
87
88
89
        } else if (*s == '\r') {
            print(env, "\\r");
        } else if (*s == '\t') {
            print(env, "\\t");
90
91
92
93
94
95
96
        } else {
            print(env, "\\x%02x", *s);
        }
    }
    print(env, "%c", quote_char);
}

97
STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
98
    GET_STR_DATA_LEN(self_in, str_data, str_len);
99
    bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes);
100
    if (kind == PRINT_STR && !is_bytes) {
101
        print(env, "%.*s", str_len, str_data);
102
    } else {
103
104
105
        if (is_bytes) {
            print(env, "b");
        }
106
        mp_str_print_quoted(print, env, str_data, str_len);
107
    }
108
109
}

110
STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
111
112
113
114
115
116
#if MICROPY_CPYTHON_COMPAT
    if (n_kw != 0) {
        mp_arg_error_unimpl_kw();
    }
#endif

117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
    switch (n_args) {
        case 0:
            return MP_OBJ_NEW_QSTR(MP_QSTR_);

        case 1:
        {
            vstr_t *vstr = vstr_new();
            mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR);
            mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
            vstr_free(vstr);
            return s;
        }

        case 2:
        case 3:
        {
            // TODO: validate 2nd/3rd args
134
            if (!MP_OBJ_IS_TYPE(args[0], &mp_type_bytes)) {
135
                nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected"));
136
137
138
            }
            GET_STR_DATA_LEN(args[0], str_data, str_len);
            GET_STR_HASH(args[0], str_hash);
139
            mp_obj_str_t *o = str_new(&mp_type_str, NULL, str_len);
140
141
142
143
144
145
            o->data = str_data;
            o->hash = str_hash;
            return o;
        }

        default:
146
            nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments"));
147
148
149
    }
}

150
151
152
153
154
STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
    if (n_args == 0) {
        return mp_const_empty_bytes;
    }

155
156
157
158
159
160
#if MICROPY_CPYTHON_COMPAT
    if (n_kw != 0) {
        mp_arg_error_unimpl_kw();
    }
#endif

161
162
163
164
165
166
    if (MP_OBJ_IS_STR(args[0])) {
        if (n_args < 2 || n_args > 3) {
            goto wrong_args;
        }
        GET_STR_DATA_LEN(args[0], str_data, str_len);
        GET_STR_HASH(args[0], str_hash);
167
        mp_obj_str_t *o = str_new(&mp_type_bytes, NULL, str_len);
168
169
170
171
172
173
174
175
176
177
178
179
180
        o->data = str_data;
        o->hash = str_hash;
        return o;
    }

    if (n_args > 1) {
        goto wrong_args;
    }

    if (MP_OBJ_IS_SMALL_INT(args[0])) {
        uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
        byte *data;

181
        mp_obj_t o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
        memset(data, 0, len);
        return mp_obj_str_builder_end(o);
    }

    int len;
    byte *data;
    vstr_t *vstr = NULL;
    mp_obj_t o = NULL;
    // Try to create array of exact len if initializer len is known
    mp_obj_t len_in = mp_obj_len_maybe(args[0]);
    if (len_in == MP_OBJ_NULL) {
        len = -1;
        vstr = vstr_new();
    } else {
        len = MP_OBJ_SMALL_INT_VALUE(len_in);
197
        o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
198
199
    }

Damien George's avatar
Damien George committed
200
    mp_obj_t iterable = mp_getiter(args[0]);
201
    mp_obj_t item;
202
    while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {
203
204
205
206
207
208
209
210
211
212
213
        if (len == -1) {
            vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item));
        } else {
            *data++ = MP_OBJ_SMALL_INT_VALUE(item);
        }
    }

    if (len == -1) {
        vstr_shrink(vstr);
        // TODO: Optimize, borrow buffer from vstr
        len = vstr_len(vstr);
214
        o = mp_obj_str_builder_start(&mp_type_bytes, len, &data);
215
216
217
218
219
220
221
        memcpy(data, vstr_str(vstr), len);
        vstr_free(vstr);
    }

    return mp_obj_str_builder_end(o);

wrong_args:
222
        nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments"));
223
224
}

225
226
// like strstr but with specified length and allows \0 bytes
// TODO replace with something more efficient/standard
227
STATIC const byte *find_subbytes(const byte *haystack, machine_uint_t hlen, const byte *needle, machine_uint_t nlen, machine_int_t direction) {
228
    if (hlen >= nlen) {
229
230
231
232
233
234
235
236
237
238
239
240
        machine_uint_t str_index, str_index_end;
        if (direction > 0) {
            str_index = 0;
            str_index_end = hlen - nlen;
        } else {
            str_index = hlen - nlen;
            str_index_end = 0;
        }
        for (;;) {
            if (memcmp(&haystack[str_index], needle, nlen) == 0) {
                //found
                return haystack + str_index;
241
            }
242
243
244
            if (str_index == str_index_end) {
                //not found
                break;
245
            }
246
            str_index += direction;
247
248
249
250
251
        }
    }
    return NULL;
}

252
STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
253
    GET_STR_DATA_LEN(lhs_in, lhs_data, lhs_len);
254
    switch (op) {
Damien George's avatar
Damien George committed
255
256
        case MP_BINARY_OP_ADD:
        case MP_BINARY_OP_INPLACE_ADD:
257
            if (MP_OBJ_IS_STR(rhs_in)) {
258
                // add 2 strings
259
260

                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
261
                int alloc_len = lhs_len + rhs_len;
262
263

                /* code for making qstr
264
265
266
267
                byte *q_ptr;
                byte *val = qstr_build_start(alloc_len, &q_ptr);
                memcpy(val, lhs_data, lhs_len);
                memcpy(val + lhs_len, rhs_data, rhs_len);
268
269
270
271
272
                return MP_OBJ_NEW_QSTR(qstr_build_end(q_ptr));
                */

                // code for non-qstr
                byte *data;
273
                mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), alloc_len, &data);
274
275
276
                memcpy(data, lhs_data, lhs_len);
                memcpy(data + lhs_len, rhs_data, rhs_len);
                return mp_obj_str_builder_end(s);
277
278
            }
            break;
279

Damien George's avatar
Damien George committed
280
        case MP_BINARY_OP_IN:
281
            /* NOTE `a in b` is `b.__contains__(a)` */
282
283
            if (MP_OBJ_IS_STR(rhs_in)) {
                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
284
                return MP_BOOL(find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len, 1) != NULL);
285
286
            }
            break;
287

Damien George's avatar
Damien George committed
288
        case MP_BINARY_OP_MULTIPLY:
289
290
291
292
293
        {
            if (!MP_OBJ_IS_SMALL_INT(rhs_in)) {
                return NULL;
            }
            int n = MP_OBJ_SMALL_INT_VALUE(rhs_in);
294
            byte *data;
295
            mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), lhs_len * n, &data);
296
297
            mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data);
            return mp_obj_str_builder_end(s);
298
        }
299

300
301
302
303
304
305
306
307
308
309
310
311
312
        case MP_BINARY_OP_MODULO: {
            mp_obj_t *args;
            uint n_args;
            if (MP_OBJ_IS_TYPE(rhs_in, &mp_type_tuple)) {
                // TODO: Support tuple subclasses?
                mp_obj_tuple_get(rhs_in, &n_args, &args);
            } else {
                args = &rhs_in;
                n_args = 1;
            }
            return str_modulo_format(lhs_in, n_args, args);
        }

Damien George's avatar
Damien George committed
313
314
315
316
317
318
319
        // These 2 are never passed here, dealt with as a special case in mp_binary_op().
        //case MP_BINARY_OP_EQUAL:
        //case MP_BINARY_OP_NOT_EQUAL:
        case MP_BINARY_OP_LESS:
        case MP_BINARY_OP_LESS_EQUAL:
        case MP_BINARY_OP_MORE:
        case MP_BINARY_OP_MORE_EQUAL:
320
321
322
323
            if (MP_OBJ_IS_STR(rhs_in)) {
                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
                return MP_BOOL(mp_seq_cmp_bytes(op, lhs_data, lhs_len, rhs_data, rhs_len));
            }
324
325
    }

326
    return MP_OBJ_NOT_SUPPORTED;
327
328
}

329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
    GET_STR_DATA_LEN(self_in, self_data, self_len);
    if (value == MP_OBJ_SENTINEL) {
        // load
#if MICROPY_ENABLE_SLICE
        if (MP_OBJ_IS_TYPE(index, &mp_type_slice)) {
            machine_uint_t start, stop;
            if (!m_seq_get_fast_slice_indexes(self_len, index, &start, &stop)) {
                assert(0);
            }
            return mp_obj_new_str(self_data + start, stop - start, false);
        }
#endif
        mp_obj_type_t *type = mp_obj_get_type(self_in);
        uint index_val = mp_get_index(type, self_len, index, false);
        if (type == &mp_type_bytes) {
            return MP_OBJ_NEW_SMALL_INT((mp_small_int_t)self_data[index_val]);
        } else {
            return mp_obj_new_str(self_data + index_val, 1, true);
        }
    } else {
        return MP_OBJ_NOT_SUPPORTED;
    }
}

354
STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
355
    assert(MP_OBJ_IS_STR(self_in));
356

357
    // get separation string
358
    GET_STR_DATA_LEN(self_in, sep_str, sep_len);
359
360

    // process args
361
362
    uint seq_len;
    mp_obj_t *seq_items;
363
    if (MP_OBJ_IS_TYPE(arg, &mp_type_tuple)) {
364
365
        mp_obj_tuple_get(arg, &seq_len, &seq_items);
    } else {
366
367
        if (!MP_OBJ_IS_TYPE(arg, &mp_type_list)) {
            // arg is not a list, try to convert it to one
368
            // TODO: Try to optimize?
369
370
371
            arg = mp_type_list.make_new((mp_obj_t)&mp_type_list, 1, 0, &arg);
        }
        mp_obj_list_get(arg, &seq_len, &seq_items);
372
    }
373
374
375

    // count required length
    int required_len = 0;
376
    for (int i = 0; i < seq_len; i++) {
377
        if (!MP_OBJ_IS_STR(seq_items[i])) {
378
            nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "join expected a list of str's"));
379
        }
380
381
382
        if (i > 0) {
            required_len += sep_len;
        }
383
384
        GET_STR_LEN(seq_items[i], l);
        required_len += l;
385
386
387
    }

    // make joined string
388
    byte *data;
389
    mp_obj_t joined_str = mp_obj_str_builder_start(mp_obj_get_type(self_in), required_len, &data);
390
391
    for (int i = 0; i < seq_len; i++) {
        if (i > 0) {
392
393
            memcpy(data, sep_str, sep_len);
            data += sep_len;
394
        }
395
396
397
        GET_STR_DATA_LEN(seq_items[i], s, l);
        memcpy(data, s, l);
        data += l;
398
    }
399
400

    // return joined string
401
    return mp_obj_str_builder_end(joined_str);
402
403
}

Paul Sokolovsky's avatar
Paul Sokolovsky committed
404
405
#define is_ws(c) ((c) == ' ' || (c) == '\t')

406
STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
407
    machine_int_t splits = -1;
Paul Sokolovsky's avatar
Paul Sokolovsky committed
408
409
410
411
    mp_obj_t sep = mp_const_none;
    if (n_args > 1) {
        sep = args[1];
        if (n_args > 2) {
412
            splits = mp_obj_get_int(args[2]);
Paul Sokolovsky's avatar
Paul Sokolovsky committed
413
414
        }
    }
415

Paul Sokolovsky's avatar
Paul Sokolovsky committed
416
    mp_obj_t res = mp_obj_new_list(0, NULL);
417
418
    GET_STR_DATA_LEN(args[0], s, len);
    const byte *top = s + len;
419
420
421
422
423

    if (sep == mp_const_none) {
        // sep not given, so separate on whitespace

        // Initial whitespace is not counted as split, so we pre-do it
424
        while (s < top && is_ws(*s)) s++;
425
426
427
428
429
430
431
432
433
434
435
        while (s < top && splits != 0) {
            const byte *start = s;
            while (s < top && !is_ws(*s)) s++;
            mp_obj_list_append(res, mp_obj_new_str(start, s - start, false));
            if (s >= top) {
                break;
            }
            while (s < top && is_ws(*s)) s++;
            if (splits > 0) {
                splits--;
            }
Paul Sokolovsky's avatar
Paul Sokolovsky committed
436
437
        }

438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
        if (s < top) {
            mp_obj_list_append(res, mp_obj_new_str(s, top - s, false));
        }

    } else {
        // sep given

        uint sep_len;
        const char *sep_str = mp_obj_str_get_data(sep, &sep_len);

        if (sep_len == 0) {
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
        }

        for (;;) {
            const byte *start = s;
            for (;;) {
                if (splits == 0 || s + sep_len > top) {
                    s = top;
                    break;
                } else if (memcmp(s, sep_str, sep_len) == 0) {
                    break;
                }
                s++;
            }
            mp_obj_list_append(res, mp_obj_new_str(start, s - start, false));
            if (s >= top) {
                break;
            }
            s += sep_len;
            if (splits > 0) {
                splits--;
            }
        }
Paul Sokolovsky's avatar
Paul Sokolovsky committed
472
473
474
475
476
    }

    return res;
}

477
STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction, bool is_index) {
478
    assert(2 <= n_args && n_args <= 4);
479
480
    assert(MP_OBJ_IS_STR(args[0]));
    assert(MP_OBJ_IS_STR(args[1]));
481

482
483
    GET_STR_DATA_LEN(args[0], haystack, haystack_len);
    GET_STR_DATA_LEN(args[1], needle, needle_len);
484

485
486
    machine_uint_t start = 0;
    machine_uint_t end = haystack_len;
487
    if (n_args >= 3 && args[2] != mp_const_none) {
488
        start = mp_get_index(&mp_type_str, haystack_len, args[2], true);
489
490
    }
    if (n_args >= 4 && args[3] != mp_const_none) {
491
        end = mp_get_index(&mp_type_str, haystack_len, args[3], true);
492
493
    }

494
    const byte *p = find_subbytes(haystack + start, end - start, needle, needle_len, direction);
495
496
    if (p == NULL) {
        // not found
497
498
499
500
501
        if (is_index) {
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "substring not found"));
        } else {
            return MP_OBJ_NEW_SMALL_INT(-1);
        }
502
503
    } else {
        // found
504
        return MP_OBJ_NEW_SMALL_INT(p - haystack);
505
506
507
    }
}

508
STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
509
    return str_finder(n_args, args, 1, false);
510
511
512
}

STATIC mp_obj_t str_rfind(uint n_args, const mp_obj_t *args) {
513
514
515
516
517
518
519
520
521
    return str_finder(n_args, args, -1, false);
}

STATIC mp_obj_t str_index(uint n_args, const mp_obj_t *args) {
    return str_finder(n_args, args, 1, true);
}

STATIC mp_obj_t str_rindex(uint n_args, const mp_obj_t *args) {
    return str_finder(n_args, args, -1, true);
522
523
}

524
// TODO: (Much) more variety in args
525
STATIC mp_obj_t str_startswith(mp_obj_t self_in, mp_obj_t arg) {
526
527
528
529
530
531
532
533
    GET_STR_DATA_LEN(self_in, str, str_len);
    GET_STR_DATA_LEN(arg, prefix, prefix_len);
    if (prefix_len > str_len) {
        return mp_const_false;
    }
    return MP_BOOL(memcmp(str, prefix, prefix_len) == 0);
}

534
535
536
enum { LSTRIP, RSTRIP, STRIP };

STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
xbe's avatar
xbe committed
537
    assert(1 <= n_args && n_args <= 2);
538
539
540
541
542
    assert(MP_OBJ_IS_STR(args[0]));

    const byte *chars_to_del;
    uint chars_to_del_len;
    static const byte whitespace[] = " \t\n\r\v\f";
xbe's avatar
xbe committed
543
544
545

    if (n_args == 1) {
        chars_to_del = whitespace;
546
        chars_to_del_len = sizeof(whitespace);
xbe's avatar
xbe committed
547
    } else {
548
549
550
551
        assert(MP_OBJ_IS_STR(args[1]));
        GET_STR_DATA_LEN(args[1], s, l);
        chars_to_del = s;
        chars_to_del_len = l;
xbe's avatar
xbe committed
552
553
    }

554
    GET_STR_DATA_LEN(args[0], orig_str, orig_str_len);
xbe's avatar
xbe committed
555

556
    machine_uint_t first_good_char_pos = 0;
xbe's avatar
xbe committed
557
    bool first_good_char_pos_set = false;
558
    machine_uint_t last_good_char_pos = 0;
559
560
561
562
563
564
565
    machine_uint_t i = 0;
    machine_int_t delta = 1;
    if (type == RSTRIP) {
        i = orig_str_len - 1;
        delta = -1;
    }
    for (machine_uint_t len = orig_str_len; len > 0; len--) {
566
        if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) {
xbe's avatar
xbe committed
567
568
            if (!first_good_char_pos_set) {
                first_good_char_pos = i;
569
570
571
                if (type == LSTRIP) {
                    last_good_char_pos = orig_str_len - 1;
                    break;
572
573
574
575
                } else if (type == RSTRIP) {
                    first_good_char_pos = 0;
                    last_good_char_pos = i;
                    break;
576
                }
xbe's avatar
xbe committed
577
578
                first_good_char_pos_set = true;
            }
579
            last_good_char_pos = i;
xbe's avatar
xbe committed
580
        }
581
        i += delta;
xbe's avatar
xbe committed
582
583
584
    }

    if (first_good_char_pos == 0 && last_good_char_pos == 0) {
585
586
        // string is all whitespace, return ''
        return MP_OBJ_NEW_QSTR(MP_QSTR_);
xbe's avatar
xbe committed
587
588
589
590
    }

    assert(last_good_char_pos >= first_good_char_pos);
    //+1 to accomodate the last character
591
    machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
592
    return mp_obj_new_str(orig_str + first_good_char_pos, stripped_len, false);
xbe's avatar
xbe committed
593
594
}

595
596
597
598
599
600
601
602
603
604
605
606
STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
    return str_uni_strip(STRIP, n_args, args);
}

STATIC mp_obj_t str_lstrip(uint n_args, const mp_obj_t *args) {
    return str_uni_strip(LSTRIP, n_args, args);
}

STATIC mp_obj_t str_rstrip(uint n_args, const mp_obj_t *args) {
    return str_uni_strip(RSTRIP, n_args, args);
}

Dave Hylands's avatar
Dave Hylands committed
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
// Takes an int arg, but only parses unsigned numbers, and only changes
// *num if at least one digit was parsed.
static int str_to_int(const char *str, int *num) {
    const char *s = str;
    if (unichar_isdigit(*s)) {
        *num = 0;
        do {
            *num = *num * 10 + (*s - '0');
            s++;
        }
        while (unichar_isdigit(*s));
    }
    return s - str;
}

static bool isalignment(char ch) {
    return ch && strchr("<>=^", ch) != NULL;
}

static bool istype(char ch) {
    return ch && strchr("bcdeEfFgGnosxX%", ch) != NULL;
}

static bool arg_looks_integer(mp_obj_t arg) {
    return MP_OBJ_IS_TYPE(arg, &mp_type_bool) || MP_OBJ_IS_INT(arg);
}

static bool arg_looks_numeric(mp_obj_t arg) {
    return arg_looks_integer(arg)
#if MICROPY_ENABLE_FLOAT
        || MP_OBJ_IS_TYPE(arg, &mp_type_float)
#endif
    ;
}

642
static mp_obj_t arg_as_int(mp_obj_t arg) {
643
644
#if MICROPY_ENABLE_FLOAT
    if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
645
646
647
648
649

        // TODO: Needs a way to construct an mpz integer from a float

        mp_small_int_t num = mp_obj_get_float(arg);
        return MP_OBJ_NEW_SMALL_INT(num);
650
651
    }
#endif
652
    return arg;
653
654
}

655
mp_obj_t mp_obj_str_format(uint n_args, const mp_obj_t *args) {
656
    assert(MP_OBJ_IS_STR(args[0]));
657

658
    GET_STR_DATA_LEN(args[0], str, len);
Dave Hylands's avatar
Dave Hylands committed
659
    int arg_i = 0;
660
    vstr_t *vstr = vstr_new();
Dave Hylands's avatar
Dave Hylands committed
661
662
663
664
    pfenv_t pfenv_vstr;
    pfenv_vstr.data = vstr;
    pfenv_vstr.print_strn = pfenv_vstr_add_strn;

665
    for (const byte *top = str + len; str < top; str++) {
Dave Hylands's avatar
Dave Hylands committed
666
667
668
669
670
671
        if (*str == '}') {
            str++;
            if (str < top && *str == '}') {
                vstr_add_char(vstr, '}');
                continue;
            }
672
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Single '}' encountered in format string"));
Dave Hylands's avatar
Dave Hylands committed
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
        }
        if (*str != '{') {
            vstr_add_char(vstr, *str);
            continue;
        }

        str++;
        if (str < top && *str == '{') {
            vstr_add_char(vstr, '{');
            continue;
        }

        // replacement_field ::=  "{" [field_name] ["!" conversion] [":" format_spec] "}"

        vstr_t *field_name = NULL;
        char conversion = '\0';
        vstr_t *format_spec = NULL;

        if (str < top && *str != '}' && *str != '!' && *str != ':') {
            field_name = vstr_new();
            while (str < top && *str != '}' && *str != '!' && *str != ':') {
                vstr_add_char(field_name, *str++);
            }
            vstr_add_char(field_name, '\0');
        }

        // conversion ::=  "r" | "s"

        if (str < top && *str == '!') {
702
            str++;
Dave Hylands's avatar
Dave Hylands committed
703
704
            if (str < top && (*str == 'r' || *str == 's')) {
                conversion = *str++;
705
            } else {
706
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "end of format while looking for conversion specifier"));
Dave Hylands's avatar
Dave Hylands committed
707
708
709
710
711
712
713
714
715
716
717
718
719
720
            }
        }

        if (str < top && *str == ':') {
            str++;
            // {:} is the same as {}, which is the same as {!s}
            // This makes a difference when passing in a True or False
            // '{}'.format(True) returns 'True'
            // '{:d}'.format(True) returns '1'
            // So we treat {:} as {} and this later gets treated to be {!s}
            if (*str != '}') {
                format_spec = vstr_new(); 
                while (str < top && *str != '}') {
                    vstr_add_char(format_spec, *str++);
721
                }
Dave Hylands's avatar
Dave Hylands committed
722
723
724
725
                vstr_add_char(format_spec, '\0');
            }
        }
        if (str >= top) {
726
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "unmatched '{' in format"));
Dave Hylands's avatar
Dave Hylands committed
727
728
        }
        if (*str != '}') {
729
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "expected ':' after format specifier"));
Dave Hylands's avatar
Dave Hylands committed
730
731
732
733
734
735
        }

        mp_obj_t arg = mp_const_none;

        if (field_name) {
            if (arg_i > 0) {
736
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "cannot switch from automatic field numbering to manual field specification"));
Dave Hylands's avatar
Dave Hylands committed
737
            }
738
            int index = 0;
Dave Hylands's avatar
Dave Hylands committed
739
            if (str_to_int(vstr_str(field_name), &index) != vstr_len(field_name) - 1) {
740
                nlr_raise(mp_obj_new_exception_msg(&mp_type_KeyError, "attributes not supported yet"));
741
            }
Dave Hylands's avatar
Dave Hylands committed
742
            if (index >= n_args - 1) {
743
                nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
Dave Hylands's avatar
Dave Hylands committed
744
745
746
747
748
            }
            arg = args[index + 1];
            arg_i = -1;
            vstr_free(field_name);
            field_name = NULL;
749
        } else {
Dave Hylands's avatar
Dave Hylands committed
750
            if (arg_i < 0) {
751
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "cannot switch from manual field specification to automatic field numbering"));
Dave Hylands's avatar
Dave Hylands committed
752
753
            }
            if (arg_i >= n_args - 1) {
754
                nlr_raise(mp_obj_new_exception_msg(&mp_type_IndexError, "tuple index out of range"));
Dave Hylands's avatar
Dave Hylands committed
755
756
757
758
759
760
761
762
763
764
765
766
767
768
            }
            arg = args[arg_i + 1];
            arg_i++;
        }
        if (!format_spec && !conversion) {
            conversion = 's';
        }
        if (conversion) {
            mp_print_kind_t print_kind;
            if (conversion == 's') {
                print_kind = PRINT_STR;
            } else if (conversion == 'r') {
                print_kind = PRINT_REPR;
            } else {
769
                nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError, "Unknown conversion specifier %c", conversion));
Dave Hylands's avatar
Dave Hylands committed
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
            }
            vstr_t *arg_vstr = vstr_new();
            mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, arg_vstr, arg, print_kind);
            arg = mp_obj_new_str((const byte *)vstr_str(arg_vstr), vstr_len(arg_vstr), false);
            vstr_free(arg_vstr);
        }

        char sign = '\0';
        char fill = '\0';
        char align = '\0';
        int width = -1;
        int precision = -1;
        char type = '\0';
        int flags = 0;

        if (format_spec) {
            // The format specifier (from http://docs.python.org/2/library/string.html#formatspec)
            //
            // [[fill]align][sign][#][0][width][,][.precision][type]
            // fill        ::=  <any character>
            // align       ::=  "<" | ">" | "=" | "^"
            // sign        ::=  "+" | "-" | " "
            // width       ::=  integer
            // precision   ::=  integer
            // type        ::=  "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"

            const char *s = vstr_str(format_spec);
            if (isalignment(*s)) {
                align = *s++;
            } else if (*s && isalignment(s[1])) {
                fill = *s++;
                align = *s++;
            }
            if (*s == '+' || *s == '-' || *s == ' ') {
                if (*s == '+') {
                    flags |= PF_FLAG_SHOW_SIGN;
                } else if (*s == ' ') {
                    flags |= PF_FLAG_SPACE_SIGN;
                }
                sign = *s++;
            }
            if (*s == '#') {
                flags |= PF_FLAG_SHOW_PREFIX;
                s++;
            }
            if (*s == '0') {
                if (!align) {
                    align = '=';
                }
                if (!fill) {
                    fill = '0';
                }
            }
            s += str_to_int(s, &width);
            if (*s == ',') {
                flags |= PF_FLAG_SHOW_COMMA;
                s++;
            }
            if (*s == '.') {
                s++;
                s += str_to_int(s, &precision);
            }
            if (istype(*s)) {
                type = *s++;
            }
            if (*s) {
836
                nlr_raise(mp_obj_new_exception_msg(&mp_type_KeyError, "Invalid conversion specification"));
Dave Hylands's avatar
Dave Hylands committed
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
            }
            vstr_free(format_spec);
            format_spec = NULL;
        }
        if (!align) {
            if (arg_looks_numeric(arg)) {
                align = '>';
            } else {
                align = '<';
            }
        }
        if (!fill) {
            fill = ' ';
        }

        if (sign) {
            if (type == 's') {
854
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed in string format specifier"));
Dave Hylands's avatar
Dave Hylands committed
855
856
            }
            if (type == 'c') {
857
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Sign not allowed with integer format specifier 'c'"));
Dave Hylands's avatar
Dave Hylands committed
858
859
860
861
862
863
864
865
866
867
868
869
870
871
            }
        } else {
            sign = '-';
        }

        switch (align) {
            case '<': flags |= PF_FLAG_LEFT_ADJUST;     break;
            case '=': flags |= PF_FLAG_PAD_AFTER_SIGN;  break;
            case '^': flags |= PF_FLAG_CENTER_ADJUST;   break;
        }

        if (arg_looks_integer(arg)) {
            switch (type) {
                case 'b':
872
                    pfenv_print_mp_int(&pfenv_vstr, arg, 1, 2, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
873
874
875
876
877
878
879
880
881
882
883
884
                    continue;

                case 'c':
                {
                    char ch = mp_obj_get_int(arg);
                    pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, fill, width);
                    continue;
                }

                case '\0':  // No explicit format type implies 'd'
                case 'n':   // I don't think we support locales in uPy so use 'd'
                case 'd':
885
                    pfenv_print_mp_int(&pfenv_vstr, arg, 1, 10, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
886
887
888
                    continue;

                case 'o':
889
890
891
892
                    if (flags & PF_FLAG_SHOW_PREFIX) {
                        flags |= PF_FLAG_SHOW_OCTAL_LETTER;
                    }

893
                    pfenv_print_mp_int(&pfenv_vstr, arg, 1, 8, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
894
895
896
                    continue;

                case 'x':
897
                    pfenv_print_mp_int(&pfenv_vstr, arg, 1, 16, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
898
899
900
                    continue;

                case 'X':
901
                    pfenv_print_mp_int(&pfenv_vstr, arg, 1, 16, 'A', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
902
903
904
905
906
907
908
909
910
911
912
913
914
915
                    continue;

                case 'e':
                case 'E':
                case 'f':
                case 'F':
                case 'g':
                case 'G':
                case '%':
                    // The floating point formatters all work with anything that
                    // looks like an integer
                    break;

                default:
916
                    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
917
918
                        "Unknown format code '%c' for object of type '%s'", type, mp_obj_get_type_str(arg)));
            }
919
        }
920

921
922
        // NOTE: no else here. We need the e, f, g etc formats for integer
        //       arguments (from above if) to take this if.
923
        if (arg_looks_numeric(arg)) {
Dave Hylands's avatar
Dave Hylands committed
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
            if (!type) {

                // Even though the docs say that an unspecified type is the same
                // as 'g', there is one subtle difference, when the exponent
                // is one less than the precision.
                //  
                // '{:10.1}'.format(0.0) ==> '0e+00'
                // '{:10.1g}'.format(0.0) ==> '0'
                //
                // TODO: Figure out how to deal with this.
                //
                // A proper solution would involve adding a special flag
                // or something to format_float, and create a format_double
                // to deal with doubles. In order to fix this when using
                // sprintf, we'd need to use the e format and tweak the
                // returned result to strip trailing zeros like the g format
                // does.
                //
                // {:10.3} and {:10.2e} with 1.23e2 both produce 1.23e+02
                // but with 1.e2 you get 1e+02 and 1.00e+02
                //
                // Stripping the trailing 0's (like g) does would make the
                // e format give us the right format.
                //
                // CPython sources say:
                //   Omitted type specifier.  Behaves in the same way as repr(x)
                //   and str(x) if no precision is given, else like 'g', but with
                //   at least one digit after the decimal point. */

                type = 'g';
            }
            if (type == 'n') {
                type = 'g';
            }

            flags |= PF_FLAG_PAD_NAN_INF; // '{:06e}'.format(float('-inf')) should give '-00inf'
            switch (type) {
961
#if MICROPY_ENABLE_FLOAT
Dave Hylands's avatar
Dave Hylands committed
962
963
964
965
966
967
968
969
970
971
972
973
974
                case 'e':
                case 'E':
                case 'f':
                case 'F':
                case 'g':
                case 'G':
                    pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), type, flags, fill, width, precision); 
                    break;

                case '%':
                    flags |= PF_FLAG_ADD_PERCENT;
                    pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg) * 100.0F, 'f', flags, fill, width, precision);
                    break;
975
#endif
Dave Hylands's avatar
Dave Hylands committed
976
977

                default:
978
                    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
979
980
981
982
                        "Unknown format code '%c' for object of type 'float'",
                        type, mp_obj_get_type_str(arg)));
            }
        } else {
983
984
            // arg doesn't look like a number

Dave Hylands's avatar
Dave Hylands committed
985
            if (align == '=') {
986
                nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "'=' alignment not allowed in string format specifier"));
Dave Hylands's avatar
Dave Hylands committed
987
            }
988

Dave Hylands's avatar
Dave Hylands committed
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
            switch (type) {
                case '\0':
                    mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, arg, PRINT_STR);
                    break;

                case 's':
                {
                    uint len;
                    const char *s = mp_obj_str_get_data(arg, &len);
                    if (precision < 0) {
                        precision = len;
                    }
                    if (len > precision) {
                        len = precision;
                    }
                    pfenv_print_strn(&pfenv_vstr, s, len, flags, fill, width);
                    break;
                }

                default:
1009
                    nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
1010
1011
1012
                        "Unknown format code '%c' for object of type 'str'",
                        type, mp_obj_get_type_str(arg)));
            }
1013
1014
1015
        }
    }

1016
1017
1018
    mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
    vstr_free(vstr);
    return s;
1019
1020
}

1021
1022
1023
1024
STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, uint n_args, const mp_obj_t *args) {
    assert(MP_OBJ_IS_STR(pattern));

    GET_STR_DATA_LEN(pattern, str, len);
Dave Hylands's avatar
Dave Hylands committed
1025
    const byte *start_str = str;
1026
1027
    int arg_i = 0;
    vstr_t *vstr = vstr_new();
Dave Hylands's avatar
Dave Hylands committed
1028
1029
1030
1031
    pfenv_t pfenv_vstr;
    pfenv_vstr.data = vstr;
    pfenv_vstr.print_strn = pfenv_vstr_add_strn;

1032
    for (const byte *top = str + len; str < top; str++) {
Dave Hylands's avatar
Dave Hylands committed
1033
1034
1035
1036
1037
1038
1039
        if (*str != '%') {
            vstr_add_char(vstr, *str);
            continue;
        }
        if (++str >= top) {
            break;
        }
1040
        if (*str == '%') {
Dave Hylands's avatar
Dave Hylands committed
1041
1042
1043
1044
            vstr_add_char(vstr, '%');
            continue;
        }
        if (arg_i >= n_args) {
1045
            nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "not enough arguments for format string"));
Dave Hylands's avatar
Dave Hylands committed
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
        }
        int flags = 0;
        char fill = ' ';
        bool alt = false;
        while (str < top) {
            if (*str == '-')      flags |= PF_FLAG_LEFT_ADJUST;
            else if (*str == '+') flags |= PF_FLAG_SHOW_SIGN;
            else if (*str == ' ') flags |= PF_FLAG_SPACE_SIGN;
            else if (*str == '#') alt = true;
            else if (*str == '0') {
                flags |= PF_FLAG_PAD_AFTER_SIGN;
                fill = '0';
            } else break;
            str++;
        }
        // parse width, if it exists
        int width = 0; 
        if (str < top) {
            if (*str == '*') {
                width = mp_obj_get_int(args[arg_i++]);
                str++;
1067
            } else {
Dave Hylands's avatar
Dave Hylands committed
1068
1069
                for (; str < top && '0' <= *str && *str <= '9'; str++) {
                    width = width * 10 + *str - '0';
1070
                }
Dave Hylands's avatar
Dave Hylands committed
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
            }
        }
        int prec = -1;
        if (str < top && *str == '.') {
            if (++str < top) {
                if (*str == '*') {
                    prec = mp_obj_get_int(args[arg_i++]);
                    str++;
                } else {
                    prec = 0;
                    for (; str < top && '0' <= *str && *str <= '9'; str++) {
                        prec = prec * 10 + *str - '0';
                    }
                }
            }
        }

        if (str >= top) {
1089
            nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "incomplete format"));
Dave Hylands's avatar
Dave Hylands committed
1090
1091
1092
1093
1094
1095
1096
1097
        }
        mp_obj_t arg = args[arg_i];
        switch (*str) {
            case 'c':
                if (MP_OBJ_IS_STR(arg)) {
                    uint len;
                    const char *s = mp_obj_str_get_data(arg, &len);
                    if (len != 1) {
1098
                        nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "%c requires int or char")); 
1099
                        break;
Dave Hylands's avatar
Dave Hylands committed
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
                    }
                    pfenv_print_strn(&pfenv_vstr, s, 1, flags, ' ', width);
                    break;
                }
                if (arg_looks_integer(arg)) {
                    char ch = mp_obj_get_int(arg);
                    pfenv_print_strn(&pfenv_vstr, &ch, 1, flags, ' ', width);
                    break;
                }
#if MICROPY_ENABLE_FLOAT
                // This is what CPython reports, so we report the same.
                if (MP_OBJ_IS_TYPE(arg, &mp_type_float)) {
1112
                    nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "integer argument expected, got float")); 
Dave Hylands's avatar
Dave Hylands committed
1113
1114
1115

                }
#endif
1116
                nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "an integer is required")); 
Dave Hylands's avatar
Dave Hylands committed
1117
1118
1119
1120
1121
                break; 

            case 'd':
            case 'i':
            case 'u':
1122
                pfenv_print_mp_int(&pfenv_vstr, arg_as_int(arg), 1, 10, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
                break;

#if MICROPY_ENABLE_FLOAT
            case 'e':
            case 'E':
            case 'f':
            case 'F':
            case 'g':
            case 'G':
                pfenv_print_float(&pfenv_vstr, mp_obj_get_float(arg), *str, flags, fill, width, prec);
                break;
#endif

            case 'o':
                if (alt) {
1138
                    flags |= (PF_FLAG_SHOW_PREFIX | PF_FLAG_SHOW_OCTAL_LETTER);
Dave Hylands's avatar
Dave Hylands committed
1139
                }
1140
                pfenv_print_mp_int(&pfenv_vstr, arg_as_int(arg), 1, 8, 'a', flags, fill, width); 
Dave Hylands's avatar
Dave Hylands committed
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
                break;

            case 'r':
            case 's':
            {
                vstr_t *arg_vstr = vstr_new();
                mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf,
                                    arg_vstr, arg, *str == 'r' ? PRINT_REPR : PRINT_STR);
                uint len = vstr_len(arg_vstr);
                if (prec < 0) {
                    prec = len;
                }
                if (len > prec) {
                    len = prec;
1155
                }
Dave Hylands's avatar
Dave Hylands committed
1156
1157
1158
                pfenv_print_strn(&pfenv_vstr, vstr_str(arg_vstr), len, flags, ' ', width);
                vstr_free(arg_vstr);
                break;
1159
            }
Dave Hylands's avatar
Dave Hylands committed
1160
1161
1162
1163
1164

            case 'x':
                if (alt) {
                    flags |= PF_FLAG_SHOW_PREFIX;
                }
1165
                pfenv_print_mp_int(&pfenv_vstr, arg_as_int(arg), 1, 16, 'a', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
1166
1167
1168
1169
1170
1171
                break;

            case 'X':
                if (alt) {
                    flags |= PF_FLAG_SHOW_PREFIX;
                }
1172
                pfenv_print_mp_int(&pfenv_vstr, arg_as_int(arg), 1, 16, 'A', flags, fill, width);
Dave Hylands's avatar
Dave Hylands committed
1173
                break;
1174

Dave Hylands's avatar
Dave Hylands committed
1175
            default:
1176
                nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_ValueError,
Dave Hylands's avatar
Dave Hylands committed
1177
1178
                    "unsupported format character '%c' (0x%x) at index %d",
                    *str, *str, str - start_str));
1179
        }
Dave Hylands's avatar
Dave Hylands committed
1180
        arg_i++;
1181
1182
1183
    }

    if (arg_i != n_args) {
1184
        nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "not all arguments converted during string formatting"));
1185
1186
1187
1188
1189
1190
1191
    }

    mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
    vstr_free(vstr);
    return s;
}

1192
STATIC mp_obj_t str_replace(uint n_args, const mp_obj_t *args) {
1193
1194
    assert(MP_OBJ_IS_STR(args[0]));

1195
    machine_int_t max_rep = -1;
1196
    if (n_args == 4) {
1197
        max_rep = mp_obj_get_int(args[3]);
1198
1199
1200
        if (max_rep == 0) {
            return args[0];
        } else if (max_rep < 0) {
1201
            max_rep = -1;
1202
        }
1203
    }
1204

xbe's avatar
xbe committed
1205
    // if max_rep is still -1 by this point we will need to do all possible replacements
1206

1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
    // check argument types

    if (!MP_OBJ_IS_STR(args[1])) {
        bad_implicit_conversion(args[1]);
    }

    if (!MP_OBJ_IS_STR(args[2])) {
        bad_implicit_conversion(args[2]);
    }

    // extract string data

1219
1220
1221
    GET_STR_DATA_LEN(args[0], str, str_len);
    GET_STR_DATA_LEN(args[1], old, old_len);
    GET_STR_DATA_LEN(args[2], new, new_len);
1222
1223

    // old won't exist in str if it's longer, so nothing to replace
1224
    if (old_len > str_len) {
1225
        return args[0];
1226
1227
    }

1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
    // data for the replaced string
    byte *data = NULL;
    mp_obj_t replaced_str = MP_OBJ_NULL;

    // do 2 passes over the string:
    //   first pass computes the required length of the replaced string
    //   second pass does the replacements
    for (;;) {
        machine_uint_t replaced_str_index = 0;
        machine_uint_t num_replacements_done = 0;
        const byte *old_occurrence;
        const byte *offset_ptr = str;
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
        machine_uint_t str_len_remain = str_len;
        if (old_len == 0) {
            // if old_str is empty, copy new_str to start of replaced string
            // copy the replacement string
            if (data != NULL) {
                memcpy(data, new, new_len);
            }
            replaced_str_index += new_len;
            num_replacements_done++;
        }
        while (num_replacements_done != max_rep && str_len_remain > 0 && (old_occurrence = find_subbytes(offset_ptr, str_len_remain, old, old_len, 1)) != NULL) {
            if (old_len == 0) {
                old_occurrence += 1;
            }
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
            // copy from just after end of last occurrence of to-be-replaced string to right before start of next occurrence
            if (data != NULL) {
                memcpy(data + replaced_str_index, offset_ptr, old_occurrence - offset_ptr);
            }
            replaced_str_index += old_occurrence - offset_ptr;
            // copy the replacement string
            if (data != NULL) {
                memcpy(data + replaced_str_index, new, new_len);
            }
            replaced_str_index += new_len;
            offset_ptr = old_occurrence + old_len;
1265
            str_len_remain = str + str_len - offset_ptr;
1266
1267
1268
1269
1270
            num_replacements_done++;
        }