--- /dev/null
+.. _cookbook:
+
+{{ header }}
+
+.. _cookbook.idioms:
+.. _cookbook.selection:
+.. _cookbook.multi_index:
+.. _cookbook.missing_data:
+.. _cookbook.grouping:
+.. _cookbook.pivot:
+.. _cookbook.resample:
+.. _cookbook.merge:
+.. _cookbook.plotting:
+.. _cookbook.csv:
+.. _cookbook.csv.multiple_files:
+.. _cookbook.sql:
+.. _cookbook.excel:
+.. _cookbook.html:
+.. _cookbook.hdf:
+.. _cookbook.binary:
+
+This page has been removed for copyright reasons.
enhancingperf
sparse
gotchas
- cookbook
# Replace all " with "".
# Wrap the entire thing in double quotes.
- uname = _get_unicode_name(name)
- if not len(uname):
+ name = _get_unicode_name(name)
+ if not len(name):
raise ValueError("Empty table or column name specified")
- nul_index = uname.find("\x00")
- if nul_index >= 0:
+ if '\0' in name:
raise ValueError("SQLite identifier cannot contain NULs")
- return '"' + uname.replace('"', '""') + '"'
+ name = name.replace('"', '""')
+ return '"' + name + '"'
_SAFE_NAMES_WARNING = (
from pandas.io.formats.printing import pprint_thing
-def test_agg_api():
- # GH 6337
- # http://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error
- # different api for agg when passed custom function with mixed frame
-
- df = DataFrame(
- {
- "data1": np.random.randn(5),
- "data2": np.random.randn(5),
- "key1": ["a", "a", "b", "b", "a"],
- "key2": ["one", "two", "one", "two", "one"],
- }
- )
- grouped = df.groupby("key1")
-
- def peak_to_peak(arr):
- return arr.max() - arr.min()
-
- expected = grouped.agg([peak_to_peak])
- expected.columns = ["data1", "data2"]
- result = grouped.agg(peak_to_peak)
- tm.assert_frame_equal(result, expected)
-
-
def test_agg_datetimes_mixed():
data = [[1, "2012-01-01", 1.0], [2, "2012-01-02", 2.0], [3, None, 3.0]]
tm.assert_numpy_array_equal(result, exp)
-def test_sort():
-
- # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby # noqa: E501
- # This should result in a properly sorted Series so that the plot
- # has a sorted x axis
- # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')
-
- df = DataFrame({"value": np.random.randint(0, 10000, 100)})
- labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
- cat_labels = Categorical(labels, labels)
-
- df = df.sort_values(by=["value"], ascending=True)
- df["value_group"] = pd.cut(
- df.value, range(0, 10500, 500), right=False, labels=cat_labels
- )
-
- res = df.groupby(["value_group"], observed=False)["value_group"].count()
- exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))]
- exp.index = CategoricalIndex(exp.index, name=exp.index.name)
- tm.assert_series_equal(res, exp)
-
def test_sort2():
# dataframe groupby sort was being ignored # GH 8868
import pandas.util.testing as tm
-def test_detect_chained_assignment():
- # Inplace ops, originally from:
- # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
- a = [12, 23]
- b = [123, None]
- c = [1234, 2345]
- d = [12345, 23456]
- tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")]
- events = {
- ("eyes", "left"): a,
- ("eyes", "right"): b,
- ("ears", "left"): c,
- ("ears", "right"): d,
- }
- multiind = MultiIndex.from_tuples(tuples, names=["part", "side"])
- zed = DataFrame(events, index=["a", "b"], columns=multiind)
-
- with pytest.raises(com.SettingWithCopyError):
- zed["eyes"]["right"].fillna(value=555, inplace=True)
-
def test_cache_updating():
# 5216
with pytest.raises(TypeError):
df.loc["bar"] *= 2
- # from SO
- # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
- df_orig = DataFrame.from_dict(
- {
- "price": {
- ("DE", "Coal", "Stock"): 2,
- ("DE", "Gas", "Stock"): 4,
- ("DE", "Elec", "Demand"): 1,
- ("FR", "Gas", "Stock"): 5,
- ("FR", "Solar", "SupIm"): 0,
- ("FR", "Wind", "SupIm"): 0,
- }
- }
- )
- df_orig.index = MultiIndex.from_tuples(
- df_orig.index, names=["Sit", "Com", "Type"]
- )
-
- expected = df_orig.copy()
- expected.iloc[[0, 2, 3]] *= 2
-
- idx = pd.IndexSlice
- df = df_orig.copy()
- df.loc[idx[:, :, "Stock"], :] *= 2
- tm.assert_frame_equal(df, expected)
-
- df = df_orig.copy()
- df.loc[idx[:, :, "Stock"], "price"] *= 2
- tm.assert_frame_equal(df, expected)
def test_multiindex_assignment(self):
df["column1"] = df["column1"] + "c"
str(df)
- # from SO:
- # http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
- df = DataFrame(np.arange(0, 9), columns=["count"])
- df["group"] = "b"
-
- with pytest.raises(com.SettingWithCopyError):
- df.iloc[0:5]["group"] = "a"
# Mixed type setting but same dtype & changing dtype
df = DataFrame(
result2 = s.loc[0:3]
tm.assert_series_equal(result1, result2)
- def test_ix_weird_slicing(self):
- # http://stackoverflow.com/q/17056560/1240268
- df = DataFrame({"one": [1, 2, 3, np.nan, np.nan], "two": [1, 2, 3, 4, 5]})
- df.loc[df["one"] > 1, "two"] = -df["two"]
-
- expected = DataFrame(
- {
- "one": {0: 1.0, 1: 2.0, 2: 3.0, 3: np.nan, 4: np.nan},
- "two": {0: 1, 1: -2, 2: -3, 3: 4, 4: 5},
- }
- )
- tm.assert_frame_equal(df, expected)
-
def test_ix_assign_column_mixed(self, float_frame):
# GH #1142
df = float_frame
tm.assert_frame_equal(result, expected)
-def test_escapechar(all_parsers):
- # http://stackoverflow.com/questions/13824840/feature-request-for-
- # pandas-read-csv
- data = '''SEARCH_TERM,ACTUAL_URL
-"bra tv bord","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"
-"tv p\xc3\xa5 hjul","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"
-"SLAGBORD, \\"Bergslagen\\", IKEA:s 1700-tals serie","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"''' # noqa
-
- parser = all_parsers
- result = parser.read_csv(
- StringIO(data), escapechar="\\", quotechar='"', encoding="utf-8"
- )
-
- assert result["SEARCH_TERM"][2] == (
- 'SLAGBORD, "Bergslagen", ' "IKEA:s 1700-tals serie"
- )
- tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"]))
-
-
def test_int64_min_issues(all_parsers):
# see gh-2599
parser = all_parsers